xref: /openbmc/linux/drivers/infiniband/hw/cxgb4/qp.c (revision e3d786a3)
1 /*
2  * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/module.h>
34 
35 #include "iw_cxgb4.h"
36 
37 static int db_delay_usecs = 1;
38 module_param(db_delay_usecs, int, 0644);
39 MODULE_PARM_DESC(db_delay_usecs, "Usecs to delay awaiting db fifo to drain");
40 
41 static int ocqp_support = 1;
42 module_param(ocqp_support, int, 0644);
43 MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)");
44 
45 int db_fc_threshold = 1000;
46 module_param(db_fc_threshold, int, 0644);
47 MODULE_PARM_DESC(db_fc_threshold,
48 		 "QP count/threshold that triggers"
49 		 " automatic db flow control mode (default = 1000)");
50 
51 int db_coalescing_threshold;
52 module_param(db_coalescing_threshold, int, 0644);
53 MODULE_PARM_DESC(db_coalescing_threshold,
54 		 "QP count/threshold that triggers"
55 		 " disabling db coalescing (default = 0)");
56 
57 static int max_fr_immd = T4_MAX_FR_IMMD;
58 module_param(max_fr_immd, int, 0644);
59 MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immedate");
60 
61 static int alloc_ird(struct c4iw_dev *dev, u32 ird)
62 {
63 	int ret = 0;
64 
65 	spin_lock_irq(&dev->lock);
66 	if (ird <= dev->avail_ird)
67 		dev->avail_ird -= ird;
68 	else
69 		ret = -ENOMEM;
70 	spin_unlock_irq(&dev->lock);
71 
72 	if (ret)
73 		dev_warn(&dev->rdev.lldi.pdev->dev,
74 			 "device IRD resources exhausted\n");
75 
76 	return ret;
77 }
78 
79 static void free_ird(struct c4iw_dev *dev, int ird)
80 {
81 	spin_lock_irq(&dev->lock);
82 	dev->avail_ird += ird;
83 	spin_unlock_irq(&dev->lock);
84 }
85 
86 static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state)
87 {
88 	unsigned long flag;
89 	spin_lock_irqsave(&qhp->lock, flag);
90 	qhp->attr.state = state;
91 	spin_unlock_irqrestore(&qhp->lock, flag);
92 }
93 
94 static void dealloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
95 {
96 	c4iw_ocqp_pool_free(rdev, sq->dma_addr, sq->memsize);
97 }
98 
99 static void dealloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
100 {
101 	dma_free_coherent(&(rdev->lldi.pdev->dev), sq->memsize, sq->queue,
102 			  dma_unmap_addr(sq, mapping));
103 }
104 
105 static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
106 {
107 	if (t4_sq_onchip(sq))
108 		dealloc_oc_sq(rdev, sq);
109 	else
110 		dealloc_host_sq(rdev, sq);
111 }
112 
113 static int alloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
114 {
115 	if (!ocqp_support || !ocqp_supported(&rdev->lldi))
116 		return -ENOSYS;
117 	sq->dma_addr = c4iw_ocqp_pool_alloc(rdev, sq->memsize);
118 	if (!sq->dma_addr)
119 		return -ENOMEM;
120 	sq->phys_addr = rdev->oc_mw_pa + sq->dma_addr -
121 			rdev->lldi.vr->ocq.start;
122 	sq->queue = (__force union t4_wr *)(rdev->oc_mw_kva + sq->dma_addr -
123 					    rdev->lldi.vr->ocq.start);
124 	sq->flags |= T4_SQ_ONCHIP;
125 	return 0;
126 }
127 
128 static int alloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
129 {
130 	sq->queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), sq->memsize,
131 				       &(sq->dma_addr), GFP_KERNEL);
132 	if (!sq->queue)
133 		return -ENOMEM;
134 	sq->phys_addr = virt_to_phys(sq->queue);
135 	dma_unmap_addr_set(sq, mapping, sq->dma_addr);
136 	return 0;
137 }
138 
139 static int alloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq, int user)
140 {
141 	int ret = -ENOSYS;
142 	if (user)
143 		ret = alloc_oc_sq(rdev, sq);
144 	if (ret)
145 		ret = alloc_host_sq(rdev, sq);
146 	return ret;
147 }
148 
149 static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
150 		      struct c4iw_dev_ucontext *uctx, int has_rq)
151 {
152 	/*
153 	 * uP clears EQ contexts when the connection exits rdma mode,
154 	 * so no need to post a RESET WR for these EQs.
155 	 */
156 	dealloc_sq(rdev, &wq->sq);
157 	kfree(wq->sq.sw_sq);
158 	c4iw_put_qpid(rdev, wq->sq.qid, uctx);
159 
160 	if (has_rq) {
161 		dma_free_coherent(&rdev->lldi.pdev->dev,
162 				  wq->rq.memsize, wq->rq.queue,
163 				  dma_unmap_addr(&wq->rq, mapping));
164 		c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
165 		kfree(wq->rq.sw_rq);
166 		c4iw_put_qpid(rdev, wq->rq.qid, uctx);
167 	}
168 	return 0;
169 }
170 
171 /*
172  * Determine the BAR2 virtual address and qid. If pbar2_pa is not NULL,
173  * then this is a user mapping so compute the page-aligned physical address
174  * for mapping.
175  */
176 void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid,
177 			      enum cxgb4_bar2_qtype qtype,
178 			      unsigned int *pbar2_qid, u64 *pbar2_pa)
179 {
180 	u64 bar2_qoffset;
181 	int ret;
182 
183 	ret = cxgb4_bar2_sge_qregs(rdev->lldi.ports[0], qid, qtype,
184 				   pbar2_pa ? 1 : 0,
185 				   &bar2_qoffset, pbar2_qid);
186 	if (ret)
187 		return NULL;
188 
189 	if (pbar2_pa)
190 		*pbar2_pa = (rdev->bar2_pa + bar2_qoffset) & PAGE_MASK;
191 
192 	if (is_t4(rdev->lldi.adapter_type))
193 		return NULL;
194 
195 	return rdev->bar2_kva + bar2_qoffset;
196 }
197 
198 static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
199 		     struct t4_cq *rcq, struct t4_cq *scq,
200 		     struct c4iw_dev_ucontext *uctx,
201 		     struct c4iw_wr_wait *wr_waitp,
202 		     int need_rq)
203 {
204 	int user = (uctx != &rdev->uctx);
205 	struct fw_ri_res_wr *res_wr;
206 	struct fw_ri_res *res;
207 	int wr_len;
208 	struct sk_buff *skb;
209 	int ret = 0;
210 	int eqsize;
211 
212 	wq->sq.qid = c4iw_get_qpid(rdev, uctx);
213 	if (!wq->sq.qid)
214 		return -ENOMEM;
215 
216 	if (need_rq) {
217 		wq->rq.qid = c4iw_get_qpid(rdev, uctx);
218 		if (!wq->rq.qid) {
219 			ret = -ENOMEM;
220 			goto free_sq_qid;
221 		}
222 	}
223 
224 	if (!user) {
225 		wq->sq.sw_sq = kcalloc(wq->sq.size, sizeof(*wq->sq.sw_sq),
226 				       GFP_KERNEL);
227 		if (!wq->sq.sw_sq) {
228 			ret = -ENOMEM;
229 			goto free_rq_qid;//FIXME
230 		}
231 
232 		if (need_rq) {
233 			wq->rq.sw_rq = kcalloc(wq->rq.size,
234 					       sizeof(*wq->rq.sw_rq),
235 					       GFP_KERNEL);
236 			if (!wq->rq.sw_rq) {
237 				ret = -ENOMEM;
238 				goto free_sw_sq;
239 			}
240 		}
241 	}
242 
243 	if (need_rq) {
244 		/*
245 		 * RQT must be a power of 2 and at least 16 deep.
246 		 */
247 		wq->rq.rqt_size =
248 			roundup_pow_of_two(max_t(u16, wq->rq.size, 16));
249 		wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
250 		if (!wq->rq.rqt_hwaddr) {
251 			ret = -ENOMEM;
252 			goto free_sw_rq;
253 		}
254 	}
255 
256 	ret = alloc_sq(rdev, &wq->sq, user);
257 	if (ret)
258 		goto free_hwaddr;
259 	memset(wq->sq.queue, 0, wq->sq.memsize);
260 	dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr);
261 
262 	if (need_rq) {
263 		wq->rq.queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
264 						  wq->rq.memsize,
265 						  &wq->rq.dma_addr,
266 						  GFP_KERNEL);
267 		if (!wq->rq.queue) {
268 			ret = -ENOMEM;
269 			goto free_sq;
270 		}
271 		pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
272 			 wq->sq.queue,
273 			 (unsigned long long)virt_to_phys(wq->sq.queue),
274 			 wq->rq.queue,
275 			 (unsigned long long)virt_to_phys(wq->rq.queue));
276 		memset(wq->rq.queue, 0, wq->rq.memsize);
277 		dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
278 	}
279 
280 	wq->db = rdev->lldi.db_reg;
281 
282 	wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid,
283 					 CXGB4_BAR2_QTYPE_EGRESS,
284 					 &wq->sq.bar2_qid,
285 					 user ? &wq->sq.bar2_pa : NULL);
286 	if (need_rq)
287 		wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid,
288 						 CXGB4_BAR2_QTYPE_EGRESS,
289 						 &wq->rq.bar2_qid,
290 						 user ? &wq->rq.bar2_pa : NULL);
291 
292 	/*
293 	 * User mode must have bar2 access.
294 	 */
295 	if (user && (!wq->sq.bar2_pa || (need_rq && !wq->rq.bar2_pa))) {
296 		pr_warn("%s: sqid %u or rqid %u not in BAR2 range\n",
297 			pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid);
298 		goto free_dma;
299 	}
300 
301 	wq->rdev = rdev;
302 	wq->rq.msn = 1;
303 
304 	/* build fw_ri_res_wr */
305 	wr_len = sizeof *res_wr + 2 * sizeof *res;
306 	if (need_rq)
307 		wr_len += sizeof(*res);
308 	skb = alloc_skb(wr_len, GFP_KERNEL);
309 	if (!skb) {
310 		ret = -ENOMEM;
311 		goto free_dma;
312 	}
313 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
314 
315 	res_wr = __skb_put_zero(skb, wr_len);
316 	res_wr->op_nres = cpu_to_be32(
317 			FW_WR_OP_V(FW_RI_RES_WR) |
318 			FW_RI_RES_WR_NRES_V(need_rq ? 2 : 1) |
319 			FW_WR_COMPL_F);
320 	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
321 	res_wr->cookie = (uintptr_t)wr_waitp;
322 	res = res_wr->res;
323 	res->u.sqrq.restype = FW_RI_RES_TYPE_SQ;
324 	res->u.sqrq.op = FW_RI_RES_OP_WRITE;
325 
326 	/*
327 	 * eqsize is the number of 64B entries plus the status page size.
328 	 */
329 	eqsize = wq->sq.size * T4_SQ_NUM_SLOTS +
330 		rdev->hw_queue.t4_eq_status_entries;
331 
332 	res->u.sqrq.fetchszm_to_iqid = cpu_to_be32(
333 		FW_RI_RES_WR_HOSTFCMODE_V(0) |	/* no host cidx updates */
334 		FW_RI_RES_WR_CPRIO_V(0) |	/* don't keep in chip cache */
335 		FW_RI_RES_WR_PCIECHN_V(0) |	/* set by uP at ri_init time */
336 		(t4_sq_onchip(&wq->sq) ? FW_RI_RES_WR_ONCHIP_F : 0) |
337 		FW_RI_RES_WR_IQID_V(scq->cqid));
338 	res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
339 		FW_RI_RES_WR_DCAEN_V(0) |
340 		FW_RI_RES_WR_DCACPU_V(0) |
341 		FW_RI_RES_WR_FBMIN_V(2) |
342 		(t4_sq_onchip(&wq->sq) ? FW_RI_RES_WR_FBMAX_V(2) :
343 					 FW_RI_RES_WR_FBMAX_V(3)) |
344 		FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
345 		FW_RI_RES_WR_CIDXFTHRESH_V(0) |
346 		FW_RI_RES_WR_EQSIZE_V(eqsize));
347 	res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid);
348 	res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr);
349 
350 	if (need_rq) {
351 		res++;
352 		res->u.sqrq.restype = FW_RI_RES_TYPE_RQ;
353 		res->u.sqrq.op = FW_RI_RES_OP_WRITE;
354 
355 		/*
356 		 * eqsize is the number of 64B entries plus the status page size
357 		 */
358 		eqsize = wq->rq.size * T4_RQ_NUM_SLOTS +
359 			rdev->hw_queue.t4_eq_status_entries;
360 		res->u.sqrq.fetchszm_to_iqid =
361 			/* no host cidx updates */
362 			cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) |
363 			/* don't keep in chip cache */
364 			FW_RI_RES_WR_CPRIO_V(0) |
365 			/* set by uP at ri_init time */
366 			FW_RI_RES_WR_PCIECHN_V(0) |
367 			FW_RI_RES_WR_IQID_V(rcq->cqid));
368 		res->u.sqrq.dcaen_to_eqsize =
369 			cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) |
370 			FW_RI_RES_WR_DCACPU_V(0) |
371 			FW_RI_RES_WR_FBMIN_V(2) |
372 			FW_RI_RES_WR_FBMAX_V(3) |
373 			FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
374 			FW_RI_RES_WR_CIDXFTHRESH_V(0) |
375 			FW_RI_RES_WR_EQSIZE_V(eqsize));
376 		res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid);
377 		res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr);
378 	}
379 
380 	c4iw_init_wr_wait(wr_waitp);
381 	ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->sq.qid, __func__);
382 	if (ret)
383 		goto free_dma;
384 
385 	pr_debug("sqid 0x%x rqid 0x%x kdb 0x%p sq_bar2_addr %p rq_bar2_addr %p\n",
386 		 wq->sq.qid, wq->rq.qid, wq->db,
387 		 wq->sq.bar2_va, wq->rq.bar2_va);
388 
389 	return 0;
390 free_dma:
391 	if (need_rq)
392 		dma_free_coherent(&rdev->lldi.pdev->dev,
393 				  wq->rq.memsize, wq->rq.queue,
394 				  dma_unmap_addr(&wq->rq, mapping));
395 free_sq:
396 	dealloc_sq(rdev, &wq->sq);
397 free_hwaddr:
398 	if (need_rq)
399 		c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
400 free_sw_rq:
401 	if (need_rq)
402 		kfree(wq->rq.sw_rq);
403 free_sw_sq:
404 	kfree(wq->sq.sw_sq);
405 free_rq_qid:
406 	if (need_rq)
407 		c4iw_put_qpid(rdev, wq->rq.qid, uctx);
408 free_sq_qid:
409 	c4iw_put_qpid(rdev, wq->sq.qid, uctx);
410 	return ret;
411 }
412 
413 static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
414 		      const struct ib_send_wr *wr, int max, u32 *plenp)
415 {
416 	u8 *dstp, *srcp;
417 	u32 plen = 0;
418 	int i;
419 	int rem, len;
420 
421 	dstp = (u8 *)immdp->data;
422 	for (i = 0; i < wr->num_sge; i++) {
423 		if ((plen + wr->sg_list[i].length) > max)
424 			return -EMSGSIZE;
425 		srcp = (u8 *)(unsigned long)wr->sg_list[i].addr;
426 		plen += wr->sg_list[i].length;
427 		rem = wr->sg_list[i].length;
428 		while (rem) {
429 			if (dstp == (u8 *)&sq->queue[sq->size])
430 				dstp = (u8 *)sq->queue;
431 			if (rem <= (u8 *)&sq->queue[sq->size] - dstp)
432 				len = rem;
433 			else
434 				len = (u8 *)&sq->queue[sq->size] - dstp;
435 			memcpy(dstp, srcp, len);
436 			dstp += len;
437 			srcp += len;
438 			rem -= len;
439 		}
440 	}
441 	len = roundup(plen + sizeof *immdp, 16) - (plen + sizeof *immdp);
442 	if (len)
443 		memset(dstp, 0, len);
444 	immdp->op = FW_RI_DATA_IMMD;
445 	immdp->r1 = 0;
446 	immdp->r2 = 0;
447 	immdp->immdlen = cpu_to_be32(plen);
448 	*plenp = plen;
449 	return 0;
450 }
451 
452 static int build_isgl(__be64 *queue_start, __be64 *queue_end,
453 		      struct fw_ri_isgl *isglp, struct ib_sge *sg_list,
454 		      int num_sge, u32 *plenp)
455 
456 {
457 	int i;
458 	u32 plen = 0;
459 	__be64 *flitp;
460 
461 	if ((__be64 *)isglp == queue_end)
462 		isglp = (struct fw_ri_isgl *)queue_start;
463 
464 	flitp = (__be64 *)isglp->sge;
465 
466 	for (i = 0; i < num_sge; i++) {
467 		if ((plen + sg_list[i].length) < plen)
468 			return -EMSGSIZE;
469 		plen += sg_list[i].length;
470 		*flitp = cpu_to_be64(((u64)sg_list[i].lkey << 32) |
471 				     sg_list[i].length);
472 		if (++flitp == queue_end)
473 			flitp = queue_start;
474 		*flitp = cpu_to_be64(sg_list[i].addr);
475 		if (++flitp == queue_end)
476 			flitp = queue_start;
477 	}
478 	*flitp = (__force __be64)0;
479 	isglp->op = FW_RI_DATA_ISGL;
480 	isglp->r1 = 0;
481 	isglp->nsge = cpu_to_be16(num_sge);
482 	isglp->r2 = 0;
483 	if (plenp)
484 		*plenp = plen;
485 	return 0;
486 }
487 
488 static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
489 			   const struct ib_send_wr *wr, u8 *len16)
490 {
491 	u32 plen;
492 	int size;
493 	int ret;
494 
495 	if (wr->num_sge > T4_MAX_SEND_SGE)
496 		return -EINVAL;
497 	switch (wr->opcode) {
498 	case IB_WR_SEND:
499 		if (wr->send_flags & IB_SEND_SOLICITED)
500 			wqe->send.sendop_pkd = cpu_to_be32(
501 				FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_SE));
502 		else
503 			wqe->send.sendop_pkd = cpu_to_be32(
504 				FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND));
505 		wqe->send.stag_inv = 0;
506 		break;
507 	case IB_WR_SEND_WITH_INV:
508 		if (wr->send_flags & IB_SEND_SOLICITED)
509 			wqe->send.sendop_pkd = cpu_to_be32(
510 				FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_SE_INV));
511 		else
512 			wqe->send.sendop_pkd = cpu_to_be32(
513 				FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_INV));
514 		wqe->send.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
515 		break;
516 
517 	default:
518 		return -EINVAL;
519 	}
520 	wqe->send.r3 = 0;
521 	wqe->send.r4 = 0;
522 
523 	plen = 0;
524 	if (wr->num_sge) {
525 		if (wr->send_flags & IB_SEND_INLINE) {
526 			ret = build_immd(sq, wqe->send.u.immd_src, wr,
527 					 T4_MAX_SEND_INLINE, &plen);
528 			if (ret)
529 				return ret;
530 			size = sizeof wqe->send + sizeof(struct fw_ri_immd) +
531 			       plen;
532 		} else {
533 			ret = build_isgl((__be64 *)sq->queue,
534 					 (__be64 *)&sq->queue[sq->size],
535 					 wqe->send.u.isgl_src,
536 					 wr->sg_list, wr->num_sge, &plen);
537 			if (ret)
538 				return ret;
539 			size = sizeof wqe->send + sizeof(struct fw_ri_isgl) +
540 			       wr->num_sge * sizeof(struct fw_ri_sge);
541 		}
542 	} else {
543 		wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD;
544 		wqe->send.u.immd_src[0].r1 = 0;
545 		wqe->send.u.immd_src[0].r2 = 0;
546 		wqe->send.u.immd_src[0].immdlen = 0;
547 		size = sizeof wqe->send + sizeof(struct fw_ri_immd);
548 		plen = 0;
549 	}
550 	*len16 = DIV_ROUND_UP(size, 16);
551 	wqe->send.plen = cpu_to_be32(plen);
552 	return 0;
553 }
554 
555 static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
556 			    const struct ib_send_wr *wr, u8 *len16)
557 {
558 	u32 plen;
559 	int size;
560 	int ret;
561 
562 	if (wr->num_sge > T4_MAX_SEND_SGE)
563 		return -EINVAL;
564 
565 	/*
566 	 * iWARP protocol supports 64 bit immediate data but rdma api
567 	 * limits it to 32bit.
568 	 */
569 	if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
570 		wqe->write.iw_imm_data.ib_imm_data.imm_data32 = wr->ex.imm_data;
571 	else
572 		wqe->write.iw_imm_data.ib_imm_data.imm_data32 = 0;
573 	wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
574 	wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
575 	if (wr->num_sge) {
576 		if (wr->send_flags & IB_SEND_INLINE) {
577 			ret = build_immd(sq, wqe->write.u.immd_src, wr,
578 					 T4_MAX_WRITE_INLINE, &plen);
579 			if (ret)
580 				return ret;
581 			size = sizeof wqe->write + sizeof(struct fw_ri_immd) +
582 			       plen;
583 		} else {
584 			ret = build_isgl((__be64 *)sq->queue,
585 					 (__be64 *)&sq->queue[sq->size],
586 					 wqe->write.u.isgl_src,
587 					 wr->sg_list, wr->num_sge, &plen);
588 			if (ret)
589 				return ret;
590 			size = sizeof wqe->write + sizeof(struct fw_ri_isgl) +
591 			       wr->num_sge * sizeof(struct fw_ri_sge);
592 		}
593 	} else {
594 		wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD;
595 		wqe->write.u.immd_src[0].r1 = 0;
596 		wqe->write.u.immd_src[0].r2 = 0;
597 		wqe->write.u.immd_src[0].immdlen = 0;
598 		size = sizeof wqe->write + sizeof(struct fw_ri_immd);
599 		plen = 0;
600 	}
601 	*len16 = DIV_ROUND_UP(size, 16);
602 	wqe->write.plen = cpu_to_be32(plen);
603 	return 0;
604 }
605 
606 static void build_immd_cmpl(struct t4_sq *sq, struct fw_ri_immd_cmpl *immdp,
607 			    struct ib_send_wr *wr)
608 {
609 	memcpy((u8 *)immdp->data, (u8 *)(uintptr_t)wr->sg_list->addr, 16);
610 	memset(immdp->r1, 0, 6);
611 	immdp->op = FW_RI_DATA_IMMD;
612 	immdp->immdlen = 16;
613 }
614 
615 static void build_rdma_write_cmpl(struct t4_sq *sq,
616 				  struct fw_ri_rdma_write_cmpl_wr *wcwr,
617 				  const struct ib_send_wr *wr, u8 *len16)
618 {
619 	u32 plen;
620 	int size;
621 
622 	/*
623 	 * This code assumes the struct fields preceding the write isgl
624 	 * fit in one 64B WR slot.  This is because the WQE is built
625 	 * directly in the dma queue, and wrapping is only handled
626 	 * by the code buildling sgls.  IE the "fixed part" of the wr
627 	 * structs must all fit in 64B.  The WQE build code should probably be
628 	 * redesigned to avoid this restriction, but for now just add
629 	 * the BUILD_BUG_ON() to catch if this WQE struct gets too big.
630 	 */
631 	BUILD_BUG_ON(offsetof(struct fw_ri_rdma_write_cmpl_wr, u) > 64);
632 
633 	wcwr->stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
634 	wcwr->to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
635 	wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey);
636 	wcwr->r2 = 0;
637 	wcwr->r3 = 0;
638 
639 	/* SEND_INV SGL */
640 	if (wr->next->send_flags & IB_SEND_INLINE)
641 		build_immd_cmpl(sq, &wcwr->u_cmpl.immd_src, wr->next);
642 	else
643 		build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size],
644 			   &wcwr->u_cmpl.isgl_src, wr->next->sg_list, 1, NULL);
645 
646 	/* WRITE SGL */
647 	build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size],
648 		   wcwr->u.isgl_src, wr->sg_list, wr->num_sge, &plen);
649 
650 	size = sizeof(*wcwr) + sizeof(struct fw_ri_isgl) +
651 		wr->num_sge * sizeof(struct fw_ri_sge);
652 	wcwr->plen = cpu_to_be32(plen);
653 	*len16 = DIV_ROUND_UP(size, 16);
654 }
655 
656 static int build_rdma_read(union t4_wr *wqe, const struct ib_send_wr *wr,
657 			   u8 *len16)
658 {
659 	if (wr->num_sge > 1)
660 		return -EINVAL;
661 	if (wr->num_sge && wr->sg_list[0].length) {
662 		wqe->read.stag_src = cpu_to_be32(rdma_wr(wr)->rkey);
663 		wqe->read.to_src_hi = cpu_to_be32((u32)(rdma_wr(wr)->remote_addr
664 							>> 32));
665 		wqe->read.to_src_lo = cpu_to_be32((u32)rdma_wr(wr)->remote_addr);
666 		wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey);
667 		wqe->read.plen = cpu_to_be32(wr->sg_list[0].length);
668 		wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr
669 							 >> 32));
670 		wqe->read.to_sink_lo = cpu_to_be32((u32)(wr->sg_list[0].addr));
671 	} else {
672 		wqe->read.stag_src = cpu_to_be32(2);
673 		wqe->read.to_src_hi = 0;
674 		wqe->read.to_src_lo = 0;
675 		wqe->read.stag_sink = cpu_to_be32(2);
676 		wqe->read.plen = 0;
677 		wqe->read.to_sink_hi = 0;
678 		wqe->read.to_sink_lo = 0;
679 	}
680 	wqe->read.r2 = 0;
681 	wqe->read.r5 = 0;
682 	*len16 = DIV_ROUND_UP(sizeof wqe->read, 16);
683 	return 0;
684 }
685 
686 static void post_write_cmpl(struct c4iw_qp *qhp, const struct ib_send_wr *wr)
687 {
688 	bool send_signaled = (wr->next->send_flags & IB_SEND_SIGNALED) ||
689 			     qhp->sq_sig_all;
690 	bool write_signaled = (wr->send_flags & IB_SEND_SIGNALED) ||
691 			      qhp->sq_sig_all;
692 	struct t4_swsqe *swsqe;
693 	union t4_wr *wqe;
694 	u16 write_wrid;
695 	u8 len16;
696 	u16 idx;
697 
698 	/*
699 	 * The sw_sq entries still look like a WRITE and a SEND and consume
700 	 * 2 slots. The FW WR, however, will be a single uber-WR.
701 	 */
702 	wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue +
703 	       qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE);
704 	build_rdma_write_cmpl(&qhp->wq.sq, &wqe->write_cmpl, wr, &len16);
705 
706 	/* WRITE swsqe */
707 	swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
708 	swsqe->opcode = FW_RI_RDMA_WRITE;
709 	swsqe->idx = qhp->wq.sq.pidx;
710 	swsqe->complete = 0;
711 	swsqe->signaled = write_signaled;
712 	swsqe->flushed = 0;
713 	swsqe->wr_id = wr->wr_id;
714 	if (c4iw_wr_log) {
715 		swsqe->sge_ts =
716 			cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]);
717 		swsqe->host_time = ktime_get();
718 	}
719 
720 	write_wrid = qhp->wq.sq.pidx;
721 
722 	/* just bump the sw_sq */
723 	qhp->wq.sq.in_use++;
724 	if (++qhp->wq.sq.pidx == qhp->wq.sq.size)
725 		qhp->wq.sq.pidx = 0;
726 
727 	/* SEND_WITH_INV swsqe */
728 	swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
729 	swsqe->opcode = FW_RI_SEND_WITH_INV;
730 	swsqe->idx = qhp->wq.sq.pidx;
731 	swsqe->complete = 0;
732 	swsqe->signaled = send_signaled;
733 	swsqe->flushed = 0;
734 	swsqe->wr_id = wr->next->wr_id;
735 	if (c4iw_wr_log) {
736 		swsqe->sge_ts =
737 			cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]);
738 		swsqe->host_time = ktime_get();
739 	}
740 
741 	wqe->write_cmpl.flags_send = send_signaled ? FW_RI_COMPLETION_FLAG : 0;
742 	wqe->write_cmpl.wrid_send = qhp->wq.sq.pidx;
743 
744 	init_wr_hdr(wqe, write_wrid, FW_RI_RDMA_WRITE_CMPL_WR,
745 		    write_signaled ? FW_RI_COMPLETION_FLAG : 0, len16);
746 	t4_sq_produce(&qhp->wq, len16);
747 	idx = DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE);
748 
749 	t4_ring_sq_db(&qhp->wq, idx, wqe);
750 }
751 
752 static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
753 			   const struct ib_recv_wr *wr, u8 *len16)
754 {
755 	int ret;
756 
757 	ret = build_isgl((__be64 *)qhp->wq.rq.queue,
758 			 (__be64 *)&qhp->wq.rq.queue[qhp->wq.rq.size],
759 			 &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
760 	if (ret)
761 		return ret;
762 	*len16 = DIV_ROUND_UP(sizeof wqe->recv +
763 			      wr->num_sge * sizeof(struct fw_ri_sge), 16);
764 	return 0;
765 }
766 
767 static int build_srq_recv(union t4_recv_wr *wqe, const struct ib_recv_wr *wr,
768 			  u8 *len16)
769 {
770 	int ret;
771 
772 	ret = build_isgl((__be64 *)wqe, (__be64 *)(wqe + 1),
773 			 &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
774 	if (ret)
775 		return ret;
776 	*len16 = DIV_ROUND_UP(sizeof(wqe->recv) +
777 			      wr->num_sge * sizeof(struct fw_ri_sge), 16);
778 	return 0;
779 }
780 
781 static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr,
782 			      const struct ib_reg_wr *wr, struct c4iw_mr *mhp,
783 			      u8 *len16)
784 {
785 	__be64 *p = (__be64 *)fr->pbl;
786 
787 	fr->r2 = cpu_to_be32(0);
788 	fr->stag = cpu_to_be32(mhp->ibmr.rkey);
789 
790 	fr->tpte.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
791 		FW_RI_TPTE_STAGKEY_V((mhp->ibmr.rkey & FW_RI_TPTE_STAGKEY_M)) |
792 		FW_RI_TPTE_STAGSTATE_V(1) |
793 		FW_RI_TPTE_STAGTYPE_V(FW_RI_STAG_NSMR) |
794 		FW_RI_TPTE_PDID_V(mhp->attr.pdid));
795 	fr->tpte.locread_to_qpid = cpu_to_be32(
796 		FW_RI_TPTE_PERM_V(c4iw_ib_to_tpt_access(wr->access)) |
797 		FW_RI_TPTE_ADDRTYPE_V(FW_RI_VA_BASED_TO) |
798 		FW_RI_TPTE_PS_V(ilog2(wr->mr->page_size) - 12));
799 	fr->tpte.nosnoop_pbladdr = cpu_to_be32(FW_RI_TPTE_PBLADDR_V(
800 		PBL_OFF(&mhp->rhp->rdev, mhp->attr.pbl_addr)>>3));
801 	fr->tpte.dca_mwbcnt_pstag = cpu_to_be32(0);
802 	fr->tpte.len_hi = cpu_to_be32(0);
803 	fr->tpte.len_lo = cpu_to_be32(mhp->ibmr.length);
804 	fr->tpte.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
805 	fr->tpte.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova & 0xffffffff);
806 
807 	p[0] = cpu_to_be64((u64)mhp->mpl[0]);
808 	p[1] = cpu_to_be64((u64)mhp->mpl[1]);
809 
810 	*len16 = DIV_ROUND_UP(sizeof(*fr), 16);
811 }
812 
813 static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
814 			const struct ib_reg_wr *wr, struct c4iw_mr *mhp,
815 			u8 *len16, bool dsgl_supported)
816 {
817 	struct fw_ri_immd *imdp;
818 	__be64 *p;
819 	int i;
820 	int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32);
821 	int rem;
822 
823 	if (mhp->mpl_len > t4_max_fr_depth(dsgl_supported && use_dsgl))
824 		return -EINVAL;
825 
826 	wqe->fr.qpbinde_to_dcacpu = 0;
827 	wqe->fr.pgsz_shift = ilog2(wr->mr->page_size) - 12;
828 	wqe->fr.addr_type = FW_RI_VA_BASED_TO;
829 	wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->access);
830 	wqe->fr.len_hi = 0;
831 	wqe->fr.len_lo = cpu_to_be32(mhp->ibmr.length);
832 	wqe->fr.stag = cpu_to_be32(wr->key);
833 	wqe->fr.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
834 	wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova &
835 					0xffffffff);
836 
837 	if (dsgl_supported && use_dsgl && (pbllen > max_fr_immd)) {
838 		struct fw_ri_dsgl *sglp;
839 
840 		for (i = 0; i < mhp->mpl_len; i++)
841 			mhp->mpl[i] = (__force u64)cpu_to_be64((u64)mhp->mpl[i]);
842 
843 		sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1);
844 		sglp->op = FW_RI_DATA_DSGL;
845 		sglp->r1 = 0;
846 		sglp->nsge = cpu_to_be16(1);
847 		sglp->addr0 = cpu_to_be64(mhp->mpl_addr);
848 		sglp->len0 = cpu_to_be32(pbllen);
849 
850 		*len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16);
851 	} else {
852 		imdp = (struct fw_ri_immd *)(&wqe->fr + 1);
853 		imdp->op = FW_RI_DATA_IMMD;
854 		imdp->r1 = 0;
855 		imdp->r2 = 0;
856 		imdp->immdlen = cpu_to_be32(pbllen);
857 		p = (__be64 *)(imdp + 1);
858 		rem = pbllen;
859 		for (i = 0; i < mhp->mpl_len; i++) {
860 			*p = cpu_to_be64((u64)mhp->mpl[i]);
861 			rem -= sizeof(*p);
862 			if (++p == (__be64 *)&sq->queue[sq->size])
863 				p = (__be64 *)sq->queue;
864 		}
865 		while (rem) {
866 			*p = 0;
867 			rem -= sizeof(*p);
868 			if (++p == (__be64 *)&sq->queue[sq->size])
869 				p = (__be64 *)sq->queue;
870 		}
871 		*len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*imdp)
872 				      + pbllen, 16);
873 	}
874 	return 0;
875 }
876 
877 static int build_inv_stag(union t4_wr *wqe, const struct ib_send_wr *wr,
878 			  u8 *len16)
879 {
880 	wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
881 	wqe->inv.r2 = 0;
882 	*len16 = DIV_ROUND_UP(sizeof wqe->inv, 16);
883 	return 0;
884 }
885 
886 static void free_qp_work(struct work_struct *work)
887 {
888 	struct c4iw_ucontext *ucontext;
889 	struct c4iw_qp *qhp;
890 	struct c4iw_dev *rhp;
891 
892 	qhp = container_of(work, struct c4iw_qp, free_work);
893 	ucontext = qhp->ucontext;
894 	rhp = qhp->rhp;
895 
896 	pr_debug("qhp %p ucontext %p\n", qhp, ucontext);
897 	destroy_qp(&rhp->rdev, &qhp->wq,
898 		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq);
899 
900 	if (ucontext)
901 		c4iw_put_ucontext(ucontext);
902 	c4iw_put_wr_wait(qhp->wr_waitp);
903 	kfree(qhp);
904 }
905 
906 static void queue_qp_free(struct kref *kref)
907 {
908 	struct c4iw_qp *qhp;
909 
910 	qhp = container_of(kref, struct c4iw_qp, kref);
911 	pr_debug("qhp %p\n", qhp);
912 	queue_work(qhp->rhp->rdev.free_workq, &qhp->free_work);
913 }
914 
915 void c4iw_qp_add_ref(struct ib_qp *qp)
916 {
917 	pr_debug("ib_qp %p\n", qp);
918 	kref_get(&to_c4iw_qp(qp)->kref);
919 }
920 
921 void c4iw_qp_rem_ref(struct ib_qp *qp)
922 {
923 	pr_debug("ib_qp %p\n", qp);
924 	kref_put(&to_c4iw_qp(qp)->kref, queue_qp_free);
925 }
926 
927 static void add_to_fc_list(struct list_head *head, struct list_head *entry)
928 {
929 	if (list_empty(entry))
930 		list_add_tail(entry, head);
931 }
932 
933 static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc)
934 {
935 	unsigned long flags;
936 
937 	spin_lock_irqsave(&qhp->rhp->lock, flags);
938 	spin_lock(&qhp->lock);
939 	if (qhp->rhp->db_state == NORMAL)
940 		t4_ring_sq_db(&qhp->wq, inc, NULL);
941 	else {
942 		add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
943 		qhp->wq.sq.wq_pidx_inc += inc;
944 	}
945 	spin_unlock(&qhp->lock);
946 	spin_unlock_irqrestore(&qhp->rhp->lock, flags);
947 	return 0;
948 }
949 
950 static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
951 {
952 	unsigned long flags;
953 
954 	spin_lock_irqsave(&qhp->rhp->lock, flags);
955 	spin_lock(&qhp->lock);
956 	if (qhp->rhp->db_state == NORMAL)
957 		t4_ring_rq_db(&qhp->wq, inc, NULL);
958 	else {
959 		add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
960 		qhp->wq.rq.wq_pidx_inc += inc;
961 	}
962 	spin_unlock(&qhp->lock);
963 	spin_unlock_irqrestore(&qhp->rhp->lock, flags);
964 	return 0;
965 }
966 
967 static int ib_to_fw_opcode(int ib_opcode)
968 {
969 	int opcode;
970 
971 	switch (ib_opcode) {
972 	case IB_WR_SEND_WITH_INV:
973 		opcode = FW_RI_SEND_WITH_INV;
974 		break;
975 	case IB_WR_SEND:
976 		opcode = FW_RI_SEND;
977 		break;
978 	case IB_WR_RDMA_WRITE:
979 		opcode = FW_RI_RDMA_WRITE;
980 		break;
981 	case IB_WR_RDMA_WRITE_WITH_IMM:
982 		opcode = FW_RI_WRITE_IMMEDIATE;
983 		break;
984 	case IB_WR_RDMA_READ:
985 	case IB_WR_RDMA_READ_WITH_INV:
986 		opcode = FW_RI_READ_REQ;
987 		break;
988 	case IB_WR_REG_MR:
989 		opcode = FW_RI_FAST_REGISTER;
990 		break;
991 	case IB_WR_LOCAL_INV:
992 		opcode = FW_RI_LOCAL_INV;
993 		break;
994 	default:
995 		opcode = -EINVAL;
996 	}
997 	return opcode;
998 }
999 
1000 static int complete_sq_drain_wr(struct c4iw_qp *qhp,
1001 				const struct ib_send_wr *wr)
1002 {
1003 	struct t4_cqe cqe = {};
1004 	struct c4iw_cq *schp;
1005 	unsigned long flag;
1006 	struct t4_cq *cq;
1007 	int opcode;
1008 
1009 	schp = to_c4iw_cq(qhp->ibqp.send_cq);
1010 	cq = &schp->cq;
1011 
1012 	opcode = ib_to_fw_opcode(wr->opcode);
1013 	if (opcode < 0)
1014 		return opcode;
1015 
1016 	cqe.u.drain_cookie = wr->wr_id;
1017 	cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
1018 				 CQE_OPCODE_V(opcode) |
1019 				 CQE_TYPE_V(1) |
1020 				 CQE_SWCQE_V(1) |
1021 				 CQE_DRAIN_V(1) |
1022 				 CQE_QPID_V(qhp->wq.sq.qid));
1023 
1024 	spin_lock_irqsave(&schp->lock, flag);
1025 	cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen));
1026 	cq->sw_queue[cq->sw_pidx] = cqe;
1027 	t4_swcq_produce(cq);
1028 	spin_unlock_irqrestore(&schp->lock, flag);
1029 
1030 	if (t4_clear_cq_armed(&schp->cq)) {
1031 		spin_lock_irqsave(&schp->comp_handler_lock, flag);
1032 		(*schp->ibcq.comp_handler)(&schp->ibcq,
1033 					   schp->ibcq.cq_context);
1034 		spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
1035 	}
1036 	return 0;
1037 }
1038 
1039 static int complete_sq_drain_wrs(struct c4iw_qp *qhp,
1040 				 const struct ib_send_wr *wr,
1041 				 const struct ib_send_wr **bad_wr)
1042 {
1043 	int ret = 0;
1044 
1045 	while (wr) {
1046 		ret = complete_sq_drain_wr(qhp, wr);
1047 		if (ret) {
1048 			*bad_wr = wr;
1049 			break;
1050 		}
1051 		wr = wr->next;
1052 	}
1053 	return ret;
1054 }
1055 
1056 static void complete_rq_drain_wr(struct c4iw_qp *qhp,
1057 				 const struct ib_recv_wr *wr)
1058 {
1059 	struct t4_cqe cqe = {};
1060 	struct c4iw_cq *rchp;
1061 	unsigned long flag;
1062 	struct t4_cq *cq;
1063 
1064 	rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
1065 	cq = &rchp->cq;
1066 
1067 	cqe.u.drain_cookie = wr->wr_id;
1068 	cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
1069 				 CQE_OPCODE_V(FW_RI_SEND) |
1070 				 CQE_TYPE_V(0) |
1071 				 CQE_SWCQE_V(1) |
1072 				 CQE_DRAIN_V(1) |
1073 				 CQE_QPID_V(qhp->wq.sq.qid));
1074 
1075 	spin_lock_irqsave(&rchp->lock, flag);
1076 	cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen));
1077 	cq->sw_queue[cq->sw_pidx] = cqe;
1078 	t4_swcq_produce(cq);
1079 	spin_unlock_irqrestore(&rchp->lock, flag);
1080 
1081 	if (t4_clear_cq_armed(&rchp->cq)) {
1082 		spin_lock_irqsave(&rchp->comp_handler_lock, flag);
1083 		(*rchp->ibcq.comp_handler)(&rchp->ibcq,
1084 					   rchp->ibcq.cq_context);
1085 		spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
1086 	}
1087 }
1088 
1089 static void complete_rq_drain_wrs(struct c4iw_qp *qhp,
1090 				  const struct ib_recv_wr *wr)
1091 {
1092 	while (wr) {
1093 		complete_rq_drain_wr(qhp, wr);
1094 		wr = wr->next;
1095 	}
1096 }
1097 
1098 int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
1099 		   const struct ib_send_wr **bad_wr)
1100 {
1101 	int err = 0;
1102 	u8 len16 = 0;
1103 	enum fw_wr_opcodes fw_opcode = 0;
1104 	enum fw_ri_wr_flags fw_flags;
1105 	struct c4iw_qp *qhp;
1106 	struct c4iw_dev *rhp;
1107 	union t4_wr *wqe = NULL;
1108 	u32 num_wrs;
1109 	struct t4_swsqe *swsqe;
1110 	unsigned long flag;
1111 	u16 idx = 0;
1112 
1113 	qhp = to_c4iw_qp(ibqp);
1114 	rhp = qhp->rhp;
1115 	spin_lock_irqsave(&qhp->lock, flag);
1116 
1117 	/*
1118 	 * If the qp has been flushed, then just insert a special
1119 	 * drain cqe.
1120 	 */
1121 	if (qhp->wq.flushed) {
1122 		spin_unlock_irqrestore(&qhp->lock, flag);
1123 		err = complete_sq_drain_wrs(qhp, wr, bad_wr);
1124 		return err;
1125 	}
1126 	num_wrs = t4_sq_avail(&qhp->wq);
1127 	if (num_wrs == 0) {
1128 		spin_unlock_irqrestore(&qhp->lock, flag);
1129 		*bad_wr = wr;
1130 		return -ENOMEM;
1131 	}
1132 
1133 	/*
1134 	 * Fastpath for NVMe-oF target WRITE + SEND_WITH_INV wr chain which is
1135 	 * the response for small NVMEe-oF READ requests.  If the chain is
1136 	 * exactly a WRITE->SEND_WITH_INV and the sgl depths and lengths
1137 	 * meet the requirements of the fw_ri_write_cmpl_wr work request,
1138 	 * then build and post the write_cmpl WR.  If any of the tests
1139 	 * below are not true, then we continue on with the tradtional WRITE
1140 	 * and SEND WRs.
1141 	 */
1142 	if (qhp->rhp->rdev.lldi.write_cmpl_support &&
1143 	    CHELSIO_CHIP_VERSION(qhp->rhp->rdev.lldi.adapter_type) >=
1144 	    CHELSIO_T5 &&
1145 	    wr && wr->next && !wr->next->next &&
1146 	    wr->opcode == IB_WR_RDMA_WRITE &&
1147 	    wr->sg_list[0].length && wr->num_sge <= T4_WRITE_CMPL_MAX_SGL &&
1148 	    wr->next->opcode == IB_WR_SEND_WITH_INV &&
1149 	    wr->next->sg_list[0].length == T4_WRITE_CMPL_MAX_CQE &&
1150 	    wr->next->num_sge == 1 && num_wrs >= 2) {
1151 		post_write_cmpl(qhp, wr);
1152 		spin_unlock_irqrestore(&qhp->lock, flag);
1153 		return 0;
1154 	}
1155 
1156 	while (wr) {
1157 		if (num_wrs == 0) {
1158 			err = -ENOMEM;
1159 			*bad_wr = wr;
1160 			break;
1161 		}
1162 		wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue +
1163 		      qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE);
1164 
1165 		fw_flags = 0;
1166 		if (wr->send_flags & IB_SEND_SOLICITED)
1167 			fw_flags |= FW_RI_SOLICITED_EVENT_FLAG;
1168 		if (wr->send_flags & IB_SEND_SIGNALED || qhp->sq_sig_all)
1169 			fw_flags |= FW_RI_COMPLETION_FLAG;
1170 		swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
1171 		switch (wr->opcode) {
1172 		case IB_WR_SEND_WITH_INV:
1173 		case IB_WR_SEND:
1174 			if (wr->send_flags & IB_SEND_FENCE)
1175 				fw_flags |= FW_RI_READ_FENCE_FLAG;
1176 			fw_opcode = FW_RI_SEND_WR;
1177 			if (wr->opcode == IB_WR_SEND)
1178 				swsqe->opcode = FW_RI_SEND;
1179 			else
1180 				swsqe->opcode = FW_RI_SEND_WITH_INV;
1181 			err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16);
1182 			break;
1183 		case IB_WR_RDMA_WRITE_WITH_IMM:
1184 			if (unlikely(!rhp->rdev.lldi.write_w_imm_support)) {
1185 				err = -EINVAL;
1186 				break;
1187 			}
1188 			fw_flags |= FW_RI_RDMA_WRITE_WITH_IMMEDIATE;
1189 			/*FALLTHROUGH*/
1190 		case IB_WR_RDMA_WRITE:
1191 			fw_opcode = FW_RI_RDMA_WRITE_WR;
1192 			swsqe->opcode = FW_RI_RDMA_WRITE;
1193 			err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16);
1194 			break;
1195 		case IB_WR_RDMA_READ:
1196 		case IB_WR_RDMA_READ_WITH_INV:
1197 			fw_opcode = FW_RI_RDMA_READ_WR;
1198 			swsqe->opcode = FW_RI_READ_REQ;
1199 			if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) {
1200 				c4iw_invalidate_mr(rhp, wr->sg_list[0].lkey);
1201 				fw_flags = FW_RI_RDMA_READ_INVALIDATE;
1202 			} else {
1203 				fw_flags = 0;
1204 			}
1205 			err = build_rdma_read(wqe, wr, &len16);
1206 			if (err)
1207 				break;
1208 			swsqe->read_len = wr->sg_list[0].length;
1209 			if (!qhp->wq.sq.oldest_read)
1210 				qhp->wq.sq.oldest_read = swsqe;
1211 			break;
1212 		case IB_WR_REG_MR: {
1213 			struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr);
1214 
1215 			swsqe->opcode = FW_RI_FAST_REGISTER;
1216 			if (rhp->rdev.lldi.fr_nsmr_tpte_wr_support &&
1217 			    !mhp->attr.state && mhp->mpl_len <= 2) {
1218 				fw_opcode = FW_RI_FR_NSMR_TPTE_WR;
1219 				build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr),
1220 						  mhp, &len16);
1221 			} else {
1222 				fw_opcode = FW_RI_FR_NSMR_WR;
1223 				err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr),
1224 				       mhp, &len16,
1225 				       rhp->rdev.lldi.ulptx_memwrite_dsgl);
1226 				if (err)
1227 					break;
1228 			}
1229 			mhp->attr.state = 1;
1230 			break;
1231 		}
1232 		case IB_WR_LOCAL_INV:
1233 			if (wr->send_flags & IB_SEND_FENCE)
1234 				fw_flags |= FW_RI_LOCAL_FENCE_FLAG;
1235 			fw_opcode = FW_RI_INV_LSTAG_WR;
1236 			swsqe->opcode = FW_RI_LOCAL_INV;
1237 			err = build_inv_stag(wqe, wr, &len16);
1238 			c4iw_invalidate_mr(rhp, wr->ex.invalidate_rkey);
1239 			break;
1240 		default:
1241 			pr_warn("%s post of type=%d TBD!\n", __func__,
1242 				wr->opcode);
1243 			err = -EINVAL;
1244 		}
1245 		if (err) {
1246 			*bad_wr = wr;
1247 			break;
1248 		}
1249 		swsqe->idx = qhp->wq.sq.pidx;
1250 		swsqe->complete = 0;
1251 		swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) ||
1252 				  qhp->sq_sig_all;
1253 		swsqe->flushed = 0;
1254 		swsqe->wr_id = wr->wr_id;
1255 		if (c4iw_wr_log) {
1256 			swsqe->sge_ts = cxgb4_read_sge_timestamp(
1257 					rhp->rdev.lldi.ports[0]);
1258 			swsqe->host_time = ktime_get();
1259 		}
1260 
1261 		init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16);
1262 
1263 		pr_debug("cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u\n",
1264 			 (unsigned long long)wr->wr_id, qhp->wq.sq.pidx,
1265 			 swsqe->opcode, swsqe->read_len);
1266 		wr = wr->next;
1267 		num_wrs--;
1268 		t4_sq_produce(&qhp->wq, len16);
1269 		idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
1270 	}
1271 	if (!rhp->rdev.status_page->db_off) {
1272 		t4_ring_sq_db(&qhp->wq, idx, wqe);
1273 		spin_unlock_irqrestore(&qhp->lock, flag);
1274 	} else {
1275 		spin_unlock_irqrestore(&qhp->lock, flag);
1276 		ring_kernel_sq_db(qhp, idx);
1277 	}
1278 	return err;
1279 }
1280 
1281 int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
1282 		      const struct ib_recv_wr **bad_wr)
1283 {
1284 	int err = 0;
1285 	struct c4iw_qp *qhp;
1286 	union t4_recv_wr *wqe = NULL;
1287 	u32 num_wrs;
1288 	u8 len16 = 0;
1289 	unsigned long flag;
1290 	u16 idx = 0;
1291 
1292 	qhp = to_c4iw_qp(ibqp);
1293 	spin_lock_irqsave(&qhp->lock, flag);
1294 
1295 	/*
1296 	 * If the qp has been flushed, then just insert a special
1297 	 * drain cqe.
1298 	 */
1299 	if (qhp->wq.flushed) {
1300 		spin_unlock_irqrestore(&qhp->lock, flag);
1301 		complete_rq_drain_wrs(qhp, wr);
1302 		return err;
1303 	}
1304 	num_wrs = t4_rq_avail(&qhp->wq);
1305 	if (num_wrs == 0) {
1306 		spin_unlock_irqrestore(&qhp->lock, flag);
1307 		*bad_wr = wr;
1308 		return -ENOMEM;
1309 	}
1310 	while (wr) {
1311 		if (wr->num_sge > T4_MAX_RECV_SGE) {
1312 			err = -EINVAL;
1313 			*bad_wr = wr;
1314 			break;
1315 		}
1316 		wqe = (union t4_recv_wr *)((u8 *)qhp->wq.rq.queue +
1317 					   qhp->wq.rq.wq_pidx *
1318 					   T4_EQ_ENTRY_SIZE);
1319 		if (num_wrs)
1320 			err = build_rdma_recv(qhp, wqe, wr, &len16);
1321 		else
1322 			err = -ENOMEM;
1323 		if (err) {
1324 			*bad_wr = wr;
1325 			break;
1326 		}
1327 
1328 		qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].wr_id = wr->wr_id;
1329 		if (c4iw_wr_log) {
1330 			qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].sge_ts =
1331 				cxgb4_read_sge_timestamp(
1332 						qhp->rhp->rdev.lldi.ports[0]);
1333 			qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].host_time =
1334 				ktime_get();
1335 		}
1336 
1337 		wqe->recv.opcode = FW_RI_RECV_WR;
1338 		wqe->recv.r1 = 0;
1339 		wqe->recv.wrid = qhp->wq.rq.pidx;
1340 		wqe->recv.r2[0] = 0;
1341 		wqe->recv.r2[1] = 0;
1342 		wqe->recv.r2[2] = 0;
1343 		wqe->recv.len16 = len16;
1344 		pr_debug("cookie 0x%llx pidx %u\n",
1345 			 (unsigned long long)wr->wr_id, qhp->wq.rq.pidx);
1346 		t4_rq_produce(&qhp->wq, len16);
1347 		idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
1348 		wr = wr->next;
1349 		num_wrs--;
1350 	}
1351 	if (!qhp->rhp->rdev.status_page->db_off) {
1352 		t4_ring_rq_db(&qhp->wq, idx, wqe);
1353 		spin_unlock_irqrestore(&qhp->lock, flag);
1354 	} else {
1355 		spin_unlock_irqrestore(&qhp->lock, flag);
1356 		ring_kernel_rq_db(qhp, idx);
1357 	}
1358 	return err;
1359 }
1360 
1361 static void defer_srq_wr(struct t4_srq *srq, union t4_recv_wr *wqe,
1362 			 u64 wr_id, u8 len16)
1363 {
1364 	struct t4_srq_pending_wr *pwr = &srq->pending_wrs[srq->pending_pidx];
1365 
1366 	pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u ooo_count %u wr_id 0x%llx pending_cidx %u pending_pidx %u pending_in_use %u\n",
1367 		 __func__, srq->cidx, srq->pidx, srq->wq_pidx,
1368 		 srq->in_use, srq->ooo_count,
1369 		 (unsigned long long)wr_id, srq->pending_cidx,
1370 		 srq->pending_pidx, srq->pending_in_use);
1371 	pwr->wr_id = wr_id;
1372 	pwr->len16 = len16;
1373 	memcpy(&pwr->wqe, wqe, len16 * 16);
1374 	t4_srq_produce_pending_wr(srq);
1375 }
1376 
1377 int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
1378 		       const struct ib_recv_wr **bad_wr)
1379 {
1380 	union t4_recv_wr *wqe, lwqe;
1381 	struct c4iw_srq *srq;
1382 	unsigned long flag;
1383 	u8 len16 = 0;
1384 	u16 idx = 0;
1385 	int err = 0;
1386 	u32 num_wrs;
1387 
1388 	srq = to_c4iw_srq(ibsrq);
1389 	spin_lock_irqsave(&srq->lock, flag);
1390 	num_wrs = t4_srq_avail(&srq->wq);
1391 	if (num_wrs == 0) {
1392 		spin_unlock_irqrestore(&srq->lock, flag);
1393 		return -ENOMEM;
1394 	}
1395 	while (wr) {
1396 		if (wr->num_sge > T4_MAX_RECV_SGE) {
1397 			err = -EINVAL;
1398 			*bad_wr = wr;
1399 			break;
1400 		}
1401 		wqe = &lwqe;
1402 		if (num_wrs)
1403 			err = build_srq_recv(wqe, wr, &len16);
1404 		else
1405 			err = -ENOMEM;
1406 		if (err) {
1407 			*bad_wr = wr;
1408 			break;
1409 		}
1410 
1411 		wqe->recv.opcode = FW_RI_RECV_WR;
1412 		wqe->recv.r1 = 0;
1413 		wqe->recv.wrid = srq->wq.pidx;
1414 		wqe->recv.r2[0] = 0;
1415 		wqe->recv.r2[1] = 0;
1416 		wqe->recv.r2[2] = 0;
1417 		wqe->recv.len16 = len16;
1418 
1419 		if (srq->wq.ooo_count ||
1420 		    srq->wq.pending_in_use ||
1421 		    srq->wq.sw_rq[srq->wq.pidx].valid) {
1422 			defer_srq_wr(&srq->wq, wqe, wr->wr_id, len16);
1423 		} else {
1424 			srq->wq.sw_rq[srq->wq.pidx].wr_id = wr->wr_id;
1425 			srq->wq.sw_rq[srq->wq.pidx].valid = 1;
1426 			c4iw_copy_wr_to_srq(&srq->wq, wqe, len16);
1427 			pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u wr_id 0x%llx\n",
1428 				 __func__, srq->wq.cidx,
1429 				 srq->wq.pidx, srq->wq.wq_pidx,
1430 				 srq->wq.in_use,
1431 				 (unsigned long long)wr->wr_id);
1432 			t4_srq_produce(&srq->wq, len16);
1433 			idx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE);
1434 		}
1435 		wr = wr->next;
1436 		num_wrs--;
1437 	}
1438 	if (idx)
1439 		t4_ring_srq_db(&srq->wq, idx, len16, wqe);
1440 	spin_unlock_irqrestore(&srq->lock, flag);
1441 	return err;
1442 }
1443 
1444 static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
1445 				    u8 *ecode)
1446 {
1447 	int status;
1448 	int tagged;
1449 	int opcode;
1450 	int rqtype;
1451 	int send_inv;
1452 
1453 	if (!err_cqe) {
1454 		*layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
1455 		*ecode = 0;
1456 		return;
1457 	}
1458 
1459 	status = CQE_STATUS(err_cqe);
1460 	opcode = CQE_OPCODE(err_cqe);
1461 	rqtype = RQ_TYPE(err_cqe);
1462 	send_inv = (opcode == FW_RI_SEND_WITH_INV) ||
1463 		   (opcode == FW_RI_SEND_WITH_SE_INV);
1464 	tagged = (opcode == FW_RI_RDMA_WRITE) ||
1465 		 (rqtype && (opcode == FW_RI_READ_RESP));
1466 
1467 	switch (status) {
1468 	case T4_ERR_STAG:
1469 		if (send_inv) {
1470 			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
1471 			*ecode = RDMAP_CANT_INV_STAG;
1472 		} else {
1473 			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
1474 			*ecode = RDMAP_INV_STAG;
1475 		}
1476 		break;
1477 	case T4_ERR_PDID:
1478 		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
1479 		if ((opcode == FW_RI_SEND_WITH_INV) ||
1480 		    (opcode == FW_RI_SEND_WITH_SE_INV))
1481 			*ecode = RDMAP_CANT_INV_STAG;
1482 		else
1483 			*ecode = RDMAP_STAG_NOT_ASSOC;
1484 		break;
1485 	case T4_ERR_QPID:
1486 		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
1487 		*ecode = RDMAP_STAG_NOT_ASSOC;
1488 		break;
1489 	case T4_ERR_ACCESS:
1490 		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
1491 		*ecode = RDMAP_ACC_VIOL;
1492 		break;
1493 	case T4_ERR_WRAP:
1494 		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
1495 		*ecode = RDMAP_TO_WRAP;
1496 		break;
1497 	case T4_ERR_BOUND:
1498 		if (tagged) {
1499 			*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
1500 			*ecode = DDPT_BASE_BOUNDS;
1501 		} else {
1502 			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
1503 			*ecode = RDMAP_BASE_BOUNDS;
1504 		}
1505 		break;
1506 	case T4_ERR_INVALIDATE_SHARED_MR:
1507 	case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND:
1508 		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
1509 		*ecode = RDMAP_CANT_INV_STAG;
1510 		break;
1511 	case T4_ERR_ECC:
1512 	case T4_ERR_ECC_PSTAG:
1513 	case T4_ERR_INTERNAL_ERR:
1514 		*layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
1515 		*ecode = 0;
1516 		break;
1517 	case T4_ERR_OUT_OF_RQE:
1518 		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
1519 		*ecode = DDPU_INV_MSN_NOBUF;
1520 		break;
1521 	case T4_ERR_PBL_ADDR_BOUND:
1522 		*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
1523 		*ecode = DDPT_BASE_BOUNDS;
1524 		break;
1525 	case T4_ERR_CRC:
1526 		*layer_type = LAYER_MPA|DDP_LLP;
1527 		*ecode = MPA_CRC_ERR;
1528 		break;
1529 	case T4_ERR_MARKER:
1530 		*layer_type = LAYER_MPA|DDP_LLP;
1531 		*ecode = MPA_MARKER_ERR;
1532 		break;
1533 	case T4_ERR_PDU_LEN_ERR:
1534 		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
1535 		*ecode = DDPU_MSG_TOOBIG;
1536 		break;
1537 	case T4_ERR_DDP_VERSION:
1538 		if (tagged) {
1539 			*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
1540 			*ecode = DDPT_INV_VERS;
1541 		} else {
1542 			*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
1543 			*ecode = DDPU_INV_VERS;
1544 		}
1545 		break;
1546 	case T4_ERR_RDMA_VERSION:
1547 		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
1548 		*ecode = RDMAP_INV_VERS;
1549 		break;
1550 	case T4_ERR_OPCODE:
1551 		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
1552 		*ecode = RDMAP_INV_OPCODE;
1553 		break;
1554 	case T4_ERR_DDP_QUEUE_NUM:
1555 		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
1556 		*ecode = DDPU_INV_QN;
1557 		break;
1558 	case T4_ERR_MSN:
1559 	case T4_ERR_MSN_GAP:
1560 	case T4_ERR_MSN_RANGE:
1561 	case T4_ERR_IRD_OVERFLOW:
1562 		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
1563 		*ecode = DDPU_INV_MSN_RANGE;
1564 		break;
1565 	case T4_ERR_TBIT:
1566 		*layer_type = LAYER_DDP|DDP_LOCAL_CATA;
1567 		*ecode = 0;
1568 		break;
1569 	case T4_ERR_MO:
1570 		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
1571 		*ecode = DDPU_INV_MO;
1572 		break;
1573 	default:
1574 		*layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
1575 		*ecode = 0;
1576 		break;
1577 	}
1578 }
1579 
1580 static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
1581 			   gfp_t gfp)
1582 {
1583 	struct fw_ri_wr *wqe;
1584 	struct sk_buff *skb;
1585 	struct terminate_message *term;
1586 
1587 	pr_debug("qhp %p qid 0x%x tid %u\n", qhp, qhp->wq.sq.qid,
1588 		 qhp->ep->hwtid);
1589 
1590 	skb = skb_dequeue(&qhp->ep->com.ep_skb_list);
1591 	if (WARN_ON(!skb))
1592 		return;
1593 
1594 	set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
1595 
1596 	wqe = __skb_put_zero(skb, sizeof(*wqe));
1597 	wqe->op_compl = cpu_to_be32(FW_WR_OP_V(FW_RI_INIT_WR));
1598 	wqe->flowid_len16 = cpu_to_be32(
1599 		FW_WR_FLOWID_V(qhp->ep->hwtid) |
1600 		FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
1601 
1602 	wqe->u.terminate.type = FW_RI_TYPE_TERMINATE;
1603 	wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term);
1604 	term = (struct terminate_message *)wqe->u.terminate.termmsg;
1605 	if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) {
1606 		term->layer_etype = qhp->attr.layer_etype;
1607 		term->ecode = qhp->attr.ecode;
1608 	} else
1609 		build_term_codes(err_cqe, &term->layer_etype, &term->ecode);
1610 	c4iw_ofld_send(&qhp->rhp->rdev, skb);
1611 }
1612 
1613 /*
1614  * Assumes qhp lock is held.
1615  */
1616 static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
1617 		       struct c4iw_cq *schp)
1618 {
1619 	int count;
1620 	int rq_flushed = 0, sq_flushed;
1621 	unsigned long flag;
1622 
1623 	pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp);
1624 
1625 	/* locking hierarchy: cqs lock first, then qp lock. */
1626 	spin_lock_irqsave(&rchp->lock, flag);
1627 	if (schp != rchp)
1628 		spin_lock(&schp->lock);
1629 	spin_lock(&qhp->lock);
1630 
1631 	if (qhp->wq.flushed) {
1632 		spin_unlock(&qhp->lock);
1633 		if (schp != rchp)
1634 			spin_unlock(&schp->lock);
1635 		spin_unlock_irqrestore(&rchp->lock, flag);
1636 		return;
1637 	}
1638 	qhp->wq.flushed = 1;
1639 	t4_set_wq_in_error(&qhp->wq, 0);
1640 
1641 	c4iw_flush_hw_cq(rchp, qhp);
1642 	if (!qhp->srq) {
1643 		c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
1644 		rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
1645 	}
1646 
1647 	if (schp != rchp)
1648 		c4iw_flush_hw_cq(schp, qhp);
1649 	sq_flushed = c4iw_flush_sq(qhp);
1650 
1651 	spin_unlock(&qhp->lock);
1652 	if (schp != rchp)
1653 		spin_unlock(&schp->lock);
1654 	spin_unlock_irqrestore(&rchp->lock, flag);
1655 
1656 	if (schp == rchp) {
1657 		if ((rq_flushed || sq_flushed) &&
1658 		    t4_clear_cq_armed(&rchp->cq)) {
1659 			spin_lock_irqsave(&rchp->comp_handler_lock, flag);
1660 			(*rchp->ibcq.comp_handler)(&rchp->ibcq,
1661 						   rchp->ibcq.cq_context);
1662 			spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
1663 		}
1664 	} else {
1665 		if (rq_flushed && t4_clear_cq_armed(&rchp->cq)) {
1666 			spin_lock_irqsave(&rchp->comp_handler_lock, flag);
1667 			(*rchp->ibcq.comp_handler)(&rchp->ibcq,
1668 						   rchp->ibcq.cq_context);
1669 			spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
1670 		}
1671 		if (sq_flushed && t4_clear_cq_armed(&schp->cq)) {
1672 			spin_lock_irqsave(&schp->comp_handler_lock, flag);
1673 			(*schp->ibcq.comp_handler)(&schp->ibcq,
1674 						   schp->ibcq.cq_context);
1675 			spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
1676 		}
1677 	}
1678 }
1679 
1680 static void flush_qp(struct c4iw_qp *qhp)
1681 {
1682 	struct c4iw_cq *rchp, *schp;
1683 	unsigned long flag;
1684 
1685 	rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
1686 	schp = to_c4iw_cq(qhp->ibqp.send_cq);
1687 
1688 	if (qhp->ibqp.uobject) {
1689 
1690 		/* for user qps, qhp->wq.flushed is protected by qhp->mutex */
1691 		if (qhp->wq.flushed)
1692 			return;
1693 
1694 		qhp->wq.flushed = 1;
1695 		t4_set_wq_in_error(&qhp->wq, 0);
1696 		t4_set_cq_in_error(&rchp->cq);
1697 		spin_lock_irqsave(&rchp->comp_handler_lock, flag);
1698 		(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
1699 		spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
1700 		if (schp != rchp) {
1701 			t4_set_cq_in_error(&schp->cq);
1702 			spin_lock_irqsave(&schp->comp_handler_lock, flag);
1703 			(*schp->ibcq.comp_handler)(&schp->ibcq,
1704 					schp->ibcq.cq_context);
1705 			spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
1706 		}
1707 		return;
1708 	}
1709 	__flush_qp(qhp, rchp, schp);
1710 }
1711 
1712 static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
1713 		     struct c4iw_ep *ep)
1714 {
1715 	struct fw_ri_wr *wqe;
1716 	int ret;
1717 	struct sk_buff *skb;
1718 
1719 	pr_debug("qhp %p qid 0x%x tid %u\n", qhp, qhp->wq.sq.qid, ep->hwtid);
1720 
1721 	skb = skb_dequeue(&ep->com.ep_skb_list);
1722 	if (WARN_ON(!skb))
1723 		return -ENOMEM;
1724 
1725 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1726 
1727 	wqe = __skb_put_zero(skb, sizeof(*wqe));
1728 	wqe->op_compl = cpu_to_be32(
1729 		FW_WR_OP_V(FW_RI_INIT_WR) |
1730 		FW_WR_COMPL_F);
1731 	wqe->flowid_len16 = cpu_to_be32(
1732 		FW_WR_FLOWID_V(ep->hwtid) |
1733 		FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
1734 	wqe->cookie = (uintptr_t)ep->com.wr_waitp;
1735 
1736 	wqe->u.fini.type = FW_RI_TYPE_FINI;
1737 
1738 	ret = c4iw_ref_send_wait(&rhp->rdev, skb, ep->com.wr_waitp,
1739 				 qhp->ep->hwtid, qhp->wq.sq.qid, __func__);
1740 
1741 	pr_debug("ret %d\n", ret);
1742 	return ret;
1743 }
1744 
1745 static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
1746 {
1747 	pr_debug("p2p_type = %d\n", p2p_type);
1748 	memset(&init->u, 0, sizeof init->u);
1749 	switch (p2p_type) {
1750 	case FW_RI_INIT_P2PTYPE_RDMA_WRITE:
1751 		init->u.write.opcode = FW_RI_RDMA_WRITE_WR;
1752 		init->u.write.stag_sink = cpu_to_be32(1);
1753 		init->u.write.to_sink = cpu_to_be64(1);
1754 		init->u.write.u.immd_src[0].op = FW_RI_DATA_IMMD;
1755 		init->u.write.len16 = DIV_ROUND_UP(sizeof init->u.write +
1756 						   sizeof(struct fw_ri_immd),
1757 						   16);
1758 		break;
1759 	case FW_RI_INIT_P2PTYPE_READ_REQ:
1760 		init->u.write.opcode = FW_RI_RDMA_READ_WR;
1761 		init->u.read.stag_src = cpu_to_be32(1);
1762 		init->u.read.to_src_lo = cpu_to_be32(1);
1763 		init->u.read.stag_sink = cpu_to_be32(1);
1764 		init->u.read.to_sink_lo = cpu_to_be32(1);
1765 		init->u.read.len16 = DIV_ROUND_UP(sizeof init->u.read, 16);
1766 		break;
1767 	}
1768 }
1769 
1770 static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
1771 {
1772 	struct fw_ri_wr *wqe;
1773 	int ret;
1774 	struct sk_buff *skb;
1775 
1776 	pr_debug("qhp %p qid 0x%x tid %u ird %u ord %u\n", qhp,
1777 		 qhp->wq.sq.qid, qhp->ep->hwtid, qhp->ep->ird, qhp->ep->ord);
1778 
1779 	skb = alloc_skb(sizeof *wqe, GFP_KERNEL);
1780 	if (!skb) {
1781 		ret = -ENOMEM;
1782 		goto out;
1783 	}
1784 	ret = alloc_ird(rhp, qhp->attr.max_ird);
1785 	if (ret) {
1786 		qhp->attr.max_ird = 0;
1787 		kfree_skb(skb);
1788 		goto out;
1789 	}
1790 	set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
1791 
1792 	wqe = __skb_put_zero(skb, sizeof(*wqe));
1793 	wqe->op_compl = cpu_to_be32(
1794 		FW_WR_OP_V(FW_RI_INIT_WR) |
1795 		FW_WR_COMPL_F);
1796 	wqe->flowid_len16 = cpu_to_be32(
1797 		FW_WR_FLOWID_V(qhp->ep->hwtid) |
1798 		FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
1799 
1800 	wqe->cookie = (uintptr_t)qhp->ep->com.wr_waitp;
1801 
1802 	wqe->u.init.type = FW_RI_TYPE_INIT;
1803 	wqe->u.init.mpareqbit_p2ptype =
1804 		FW_RI_WR_MPAREQBIT_V(qhp->attr.mpa_attr.initiator) |
1805 		FW_RI_WR_P2PTYPE_V(qhp->attr.mpa_attr.p2p_type);
1806 	wqe->u.init.mpa_attrs = FW_RI_MPA_IETF_ENABLE;
1807 	if (qhp->attr.mpa_attr.recv_marker_enabled)
1808 		wqe->u.init.mpa_attrs |= FW_RI_MPA_RX_MARKER_ENABLE;
1809 	if (qhp->attr.mpa_attr.xmit_marker_enabled)
1810 		wqe->u.init.mpa_attrs |= FW_RI_MPA_TX_MARKER_ENABLE;
1811 	if (qhp->attr.mpa_attr.crc_enabled)
1812 		wqe->u.init.mpa_attrs |= FW_RI_MPA_CRC_ENABLE;
1813 
1814 	wqe->u.init.qp_caps = FW_RI_QP_RDMA_READ_ENABLE |
1815 			    FW_RI_QP_RDMA_WRITE_ENABLE |
1816 			    FW_RI_QP_BIND_ENABLE;
1817 	if (!qhp->ibqp.uobject)
1818 		wqe->u.init.qp_caps |= FW_RI_QP_FAST_REGISTER_ENABLE |
1819 				     FW_RI_QP_STAG0_ENABLE;
1820 	wqe->u.init.nrqe = cpu_to_be16(t4_rqes_posted(&qhp->wq));
1821 	wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd);
1822 	wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid);
1823 	wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid);
1824 	if (qhp->srq) {
1825 		wqe->u.init.rq_eqid = cpu_to_be32(FW_RI_INIT_RQEQID_SRQ |
1826 						  qhp->srq->idx);
1827 	} else {
1828 		wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid);
1829 		wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size);
1830 		wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr -
1831 						   rhp->rdev.lldi.vr->rq.start);
1832 	}
1833 	wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq);
1834 	wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq);
1835 	wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord);
1836 	wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird);
1837 	wqe->u.init.iss = cpu_to_be32(qhp->ep->snd_seq);
1838 	wqe->u.init.irs = cpu_to_be32(qhp->ep->rcv_seq);
1839 	if (qhp->attr.mpa_attr.initiator)
1840 		build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init);
1841 
1842 	ret = c4iw_ref_send_wait(&rhp->rdev, skb, qhp->ep->com.wr_waitp,
1843 				 qhp->ep->hwtid, qhp->wq.sq.qid, __func__);
1844 	if (!ret)
1845 		goto out;
1846 
1847 	free_ird(rhp, qhp->attr.max_ird);
1848 out:
1849 	pr_debug("ret %d\n", ret);
1850 	return ret;
1851 }
1852 
1853 int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
1854 		   enum c4iw_qp_attr_mask mask,
1855 		   struct c4iw_qp_attributes *attrs,
1856 		   int internal)
1857 {
1858 	int ret = 0;
1859 	struct c4iw_qp_attributes newattr = qhp->attr;
1860 	int disconnect = 0;
1861 	int terminate = 0;
1862 	int abort = 0;
1863 	int free = 0;
1864 	struct c4iw_ep *ep = NULL;
1865 
1866 	pr_debug("qhp %p sqid 0x%x rqid 0x%x ep %p state %d -> %d\n",
1867 		 qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep, qhp->attr.state,
1868 		 (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
1869 
1870 	mutex_lock(&qhp->mutex);
1871 
1872 	/* Process attr changes if in IDLE */
1873 	if (mask & C4IW_QP_ATTR_VALID_MODIFY) {
1874 		if (qhp->attr.state != C4IW_QP_STATE_IDLE) {
1875 			ret = -EIO;
1876 			goto out;
1877 		}
1878 		if (mask & C4IW_QP_ATTR_ENABLE_RDMA_READ)
1879 			newattr.enable_rdma_read = attrs->enable_rdma_read;
1880 		if (mask & C4IW_QP_ATTR_ENABLE_RDMA_WRITE)
1881 			newattr.enable_rdma_write = attrs->enable_rdma_write;
1882 		if (mask & C4IW_QP_ATTR_ENABLE_RDMA_BIND)
1883 			newattr.enable_bind = attrs->enable_bind;
1884 		if (mask & C4IW_QP_ATTR_MAX_ORD) {
1885 			if (attrs->max_ord > c4iw_max_read_depth) {
1886 				ret = -EINVAL;
1887 				goto out;
1888 			}
1889 			newattr.max_ord = attrs->max_ord;
1890 		}
1891 		if (mask & C4IW_QP_ATTR_MAX_IRD) {
1892 			if (attrs->max_ird > cur_max_read_depth(rhp)) {
1893 				ret = -EINVAL;
1894 				goto out;
1895 			}
1896 			newattr.max_ird = attrs->max_ird;
1897 		}
1898 		qhp->attr = newattr;
1899 	}
1900 
1901 	if (mask & C4IW_QP_ATTR_SQ_DB) {
1902 		ret = ring_kernel_sq_db(qhp, attrs->sq_db_inc);
1903 		goto out;
1904 	}
1905 	if (mask & C4IW_QP_ATTR_RQ_DB) {
1906 		ret = ring_kernel_rq_db(qhp, attrs->rq_db_inc);
1907 		goto out;
1908 	}
1909 
1910 	if (!(mask & C4IW_QP_ATTR_NEXT_STATE))
1911 		goto out;
1912 	if (qhp->attr.state == attrs->next_state)
1913 		goto out;
1914 
1915 	switch (qhp->attr.state) {
1916 	case C4IW_QP_STATE_IDLE:
1917 		switch (attrs->next_state) {
1918 		case C4IW_QP_STATE_RTS:
1919 			if (!(mask & C4IW_QP_ATTR_LLP_STREAM_HANDLE)) {
1920 				ret = -EINVAL;
1921 				goto out;
1922 			}
1923 			if (!(mask & C4IW_QP_ATTR_MPA_ATTR)) {
1924 				ret = -EINVAL;
1925 				goto out;
1926 			}
1927 			qhp->attr.mpa_attr = attrs->mpa_attr;
1928 			qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
1929 			qhp->ep = qhp->attr.llp_stream_handle;
1930 			set_state(qhp, C4IW_QP_STATE_RTS);
1931 
1932 			/*
1933 			 * Ref the endpoint here and deref when we
1934 			 * disassociate the endpoint from the QP.  This
1935 			 * happens in CLOSING->IDLE transition or *->ERROR
1936 			 * transition.
1937 			 */
1938 			c4iw_get_ep(&qhp->ep->com);
1939 			ret = rdma_init(rhp, qhp);
1940 			if (ret)
1941 				goto err;
1942 			break;
1943 		case C4IW_QP_STATE_ERROR:
1944 			set_state(qhp, C4IW_QP_STATE_ERROR);
1945 			flush_qp(qhp);
1946 			break;
1947 		default:
1948 			ret = -EINVAL;
1949 			goto out;
1950 		}
1951 		break;
1952 	case C4IW_QP_STATE_RTS:
1953 		switch (attrs->next_state) {
1954 		case C4IW_QP_STATE_CLOSING:
1955 			t4_set_wq_in_error(&qhp->wq, 0);
1956 			set_state(qhp, C4IW_QP_STATE_CLOSING);
1957 			ep = qhp->ep;
1958 			if (!internal) {
1959 				abort = 0;
1960 				disconnect = 1;
1961 				c4iw_get_ep(&qhp->ep->com);
1962 			}
1963 			ret = rdma_fini(rhp, qhp, ep);
1964 			if (ret)
1965 				goto err;
1966 			break;
1967 		case C4IW_QP_STATE_TERMINATE:
1968 			t4_set_wq_in_error(&qhp->wq, 0);
1969 			set_state(qhp, C4IW_QP_STATE_TERMINATE);
1970 			qhp->attr.layer_etype = attrs->layer_etype;
1971 			qhp->attr.ecode = attrs->ecode;
1972 			ep = qhp->ep;
1973 			if (!internal) {
1974 				c4iw_get_ep(&qhp->ep->com);
1975 				terminate = 1;
1976 				disconnect = 1;
1977 			} else {
1978 				terminate = qhp->attr.send_term;
1979 				ret = rdma_fini(rhp, qhp, ep);
1980 				if (ret)
1981 					goto err;
1982 			}
1983 			break;
1984 		case C4IW_QP_STATE_ERROR:
1985 			t4_set_wq_in_error(&qhp->wq, 0);
1986 			set_state(qhp, C4IW_QP_STATE_ERROR);
1987 			if (!internal) {
1988 				abort = 1;
1989 				disconnect = 1;
1990 				ep = qhp->ep;
1991 				c4iw_get_ep(&qhp->ep->com);
1992 			}
1993 			goto err;
1994 			break;
1995 		default:
1996 			ret = -EINVAL;
1997 			goto out;
1998 		}
1999 		break;
2000 	case C4IW_QP_STATE_CLOSING:
2001 
2002 		/*
2003 		 * Allow kernel users to move to ERROR for qp draining.
2004 		 */
2005 		if (!internal && (qhp->ibqp.uobject || attrs->next_state !=
2006 				  C4IW_QP_STATE_ERROR)) {
2007 			ret = -EINVAL;
2008 			goto out;
2009 		}
2010 		switch (attrs->next_state) {
2011 		case C4IW_QP_STATE_IDLE:
2012 			flush_qp(qhp);
2013 			set_state(qhp, C4IW_QP_STATE_IDLE);
2014 			qhp->attr.llp_stream_handle = NULL;
2015 			c4iw_put_ep(&qhp->ep->com);
2016 			qhp->ep = NULL;
2017 			wake_up(&qhp->wait);
2018 			break;
2019 		case C4IW_QP_STATE_ERROR:
2020 			goto err;
2021 		default:
2022 			ret = -EINVAL;
2023 			goto err;
2024 		}
2025 		break;
2026 	case C4IW_QP_STATE_ERROR:
2027 		if (attrs->next_state != C4IW_QP_STATE_IDLE) {
2028 			ret = -EINVAL;
2029 			goto out;
2030 		}
2031 		if (!t4_sq_empty(&qhp->wq) || !t4_rq_empty(&qhp->wq)) {
2032 			ret = -EINVAL;
2033 			goto out;
2034 		}
2035 		set_state(qhp, C4IW_QP_STATE_IDLE);
2036 		break;
2037 	case C4IW_QP_STATE_TERMINATE:
2038 		if (!internal) {
2039 			ret = -EINVAL;
2040 			goto out;
2041 		}
2042 		goto err;
2043 		break;
2044 	default:
2045 		pr_err("%s in a bad state %d\n", __func__, qhp->attr.state);
2046 		ret = -EINVAL;
2047 		goto err;
2048 		break;
2049 	}
2050 	goto out;
2051 err:
2052 	pr_debug("disassociating ep %p qpid 0x%x\n", qhp->ep,
2053 		 qhp->wq.sq.qid);
2054 
2055 	/* disassociate the LLP connection */
2056 	qhp->attr.llp_stream_handle = NULL;
2057 	if (!ep)
2058 		ep = qhp->ep;
2059 	qhp->ep = NULL;
2060 	set_state(qhp, C4IW_QP_STATE_ERROR);
2061 	free = 1;
2062 	abort = 1;
2063 	flush_qp(qhp);
2064 	wake_up(&qhp->wait);
2065 out:
2066 	mutex_unlock(&qhp->mutex);
2067 
2068 	if (terminate)
2069 		post_terminate(qhp, NULL, internal ? GFP_ATOMIC : GFP_KERNEL);
2070 
2071 	/*
2072 	 * If disconnect is 1, then we need to initiate a disconnect
2073 	 * on the EP.  This can be a normal close (RTS->CLOSING) or
2074 	 * an abnormal close (RTS/CLOSING->ERROR).
2075 	 */
2076 	if (disconnect) {
2077 		c4iw_ep_disconnect(ep, abort, internal ? GFP_ATOMIC :
2078 							 GFP_KERNEL);
2079 		c4iw_put_ep(&ep->com);
2080 	}
2081 
2082 	/*
2083 	 * If free is 1, then we've disassociated the EP from the QP
2084 	 * and we need to dereference the EP.
2085 	 */
2086 	if (free)
2087 		c4iw_put_ep(&ep->com);
2088 	pr_debug("exit state %d\n", qhp->attr.state);
2089 	return ret;
2090 }
2091 
2092 int c4iw_destroy_qp(struct ib_qp *ib_qp)
2093 {
2094 	struct c4iw_dev *rhp;
2095 	struct c4iw_qp *qhp;
2096 	struct c4iw_qp_attributes attrs;
2097 
2098 	qhp = to_c4iw_qp(ib_qp);
2099 	rhp = qhp->rhp;
2100 
2101 	attrs.next_state = C4IW_QP_STATE_ERROR;
2102 	if (qhp->attr.state == C4IW_QP_STATE_TERMINATE)
2103 		c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2104 	else
2105 		c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
2106 	wait_event(qhp->wait, !qhp->ep);
2107 
2108 	remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
2109 
2110 	spin_lock_irq(&rhp->lock);
2111 	if (!list_empty(&qhp->db_fc_entry))
2112 		list_del_init(&qhp->db_fc_entry);
2113 	spin_unlock_irq(&rhp->lock);
2114 	free_ird(rhp, qhp->attr.max_ird);
2115 
2116 	c4iw_qp_rem_ref(ib_qp);
2117 
2118 	pr_debug("ib_qp %p qpid 0x%0x\n", ib_qp, qhp->wq.sq.qid);
2119 	return 0;
2120 }
2121 
2122 struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
2123 			     struct ib_udata *udata)
2124 {
2125 	struct c4iw_dev *rhp;
2126 	struct c4iw_qp *qhp;
2127 	struct c4iw_pd *php;
2128 	struct c4iw_cq *schp;
2129 	struct c4iw_cq *rchp;
2130 	struct c4iw_create_qp_resp uresp;
2131 	unsigned int sqsize, rqsize = 0;
2132 	struct c4iw_ucontext *ucontext;
2133 	int ret;
2134 	struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
2135 	struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL;
2136 
2137 	pr_debug("ib_pd %p\n", pd);
2138 
2139 	if (attrs->qp_type != IB_QPT_RC)
2140 		return ERR_PTR(-EINVAL);
2141 
2142 	php = to_c4iw_pd(pd);
2143 	rhp = php->rhp;
2144 	schp = get_chp(rhp, ((struct c4iw_cq *)attrs->send_cq)->cq.cqid);
2145 	rchp = get_chp(rhp, ((struct c4iw_cq *)attrs->recv_cq)->cq.cqid);
2146 	if (!schp || !rchp)
2147 		return ERR_PTR(-EINVAL);
2148 
2149 	if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE)
2150 		return ERR_PTR(-EINVAL);
2151 
2152 	if (!attrs->srq) {
2153 		if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size)
2154 			return ERR_PTR(-E2BIG);
2155 		rqsize = attrs->cap.max_recv_wr + 1;
2156 		if (rqsize < 8)
2157 			rqsize = 8;
2158 	}
2159 
2160 	if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size)
2161 		return ERR_PTR(-E2BIG);
2162 	sqsize = attrs->cap.max_send_wr + 1;
2163 	if (sqsize < 8)
2164 		sqsize = 8;
2165 
2166 	ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
2167 
2168 	qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
2169 	if (!qhp)
2170 		return ERR_PTR(-ENOMEM);
2171 
2172 	qhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
2173 	if (!qhp->wr_waitp) {
2174 		ret = -ENOMEM;
2175 		goto err_free_qhp;
2176 	}
2177 
2178 	qhp->wq.sq.size = sqsize;
2179 	qhp->wq.sq.memsize =
2180 		(sqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
2181 		sizeof(*qhp->wq.sq.queue) + 16 * sizeof(__be64);
2182 	qhp->wq.sq.flush_cidx = -1;
2183 	if (!attrs->srq) {
2184 		qhp->wq.rq.size = rqsize;
2185 		qhp->wq.rq.memsize =
2186 			(rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
2187 			sizeof(*qhp->wq.rq.queue);
2188 	}
2189 
2190 	if (ucontext) {
2191 		qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE);
2192 		if (!attrs->srq)
2193 			qhp->wq.rq.memsize =
2194 				roundup(qhp->wq.rq.memsize, PAGE_SIZE);
2195 	}
2196 
2197 	ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq,
2198 			ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
2199 			qhp->wr_waitp, !attrs->srq);
2200 	if (ret)
2201 		goto err_free_wr_wait;
2202 
2203 	attrs->cap.max_recv_wr = rqsize - 1;
2204 	attrs->cap.max_send_wr = sqsize - 1;
2205 	attrs->cap.max_inline_data = T4_MAX_SEND_INLINE;
2206 
2207 	qhp->rhp = rhp;
2208 	qhp->attr.pd = php->pdid;
2209 	qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid;
2210 	qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid;
2211 	qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
2212 	qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
2213 	qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
2214 	if (!attrs->srq) {
2215 		qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
2216 		qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
2217 	}
2218 	qhp->attr.state = C4IW_QP_STATE_IDLE;
2219 	qhp->attr.next_state = C4IW_QP_STATE_IDLE;
2220 	qhp->attr.enable_rdma_read = 1;
2221 	qhp->attr.enable_rdma_write = 1;
2222 	qhp->attr.enable_bind = 1;
2223 	qhp->attr.max_ord = 0;
2224 	qhp->attr.max_ird = 0;
2225 	qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
2226 	spin_lock_init(&qhp->lock);
2227 	mutex_init(&qhp->mutex);
2228 	init_waitqueue_head(&qhp->wait);
2229 	kref_init(&qhp->kref);
2230 	INIT_WORK(&qhp->free_work, free_qp_work);
2231 
2232 	ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
2233 	if (ret)
2234 		goto err_destroy_qp;
2235 
2236 	if (udata && ucontext) {
2237 		sq_key_mm = kmalloc(sizeof(*sq_key_mm), GFP_KERNEL);
2238 		if (!sq_key_mm) {
2239 			ret = -ENOMEM;
2240 			goto err_remove_handle;
2241 		}
2242 		if (!attrs->srq) {
2243 			rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL);
2244 			if (!rq_key_mm) {
2245 				ret = -ENOMEM;
2246 				goto err_free_sq_key;
2247 			}
2248 		}
2249 		sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL);
2250 		if (!sq_db_key_mm) {
2251 			ret = -ENOMEM;
2252 			goto err_free_rq_key;
2253 		}
2254 		if (!attrs->srq) {
2255 			rq_db_key_mm =
2256 				kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL);
2257 			if (!rq_db_key_mm) {
2258 				ret = -ENOMEM;
2259 				goto err_free_sq_db_key;
2260 			}
2261 		}
2262 		memset(&uresp, 0, sizeof(uresp));
2263 		if (t4_sq_onchip(&qhp->wq.sq)) {
2264 			ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm),
2265 						 GFP_KERNEL);
2266 			if (!ma_sync_key_mm) {
2267 				ret = -ENOMEM;
2268 				goto err_free_rq_db_key;
2269 			}
2270 			uresp.flags = C4IW_QPF_ONCHIP;
2271 		}
2272 		if (rhp->rdev.lldi.write_w_imm_support)
2273 			uresp.flags |= C4IW_QPF_WRITE_W_IMM;
2274 		uresp.qid_mask = rhp->rdev.qpmask;
2275 		uresp.sqid = qhp->wq.sq.qid;
2276 		uresp.sq_size = qhp->wq.sq.size;
2277 		uresp.sq_memsize = qhp->wq.sq.memsize;
2278 		if (!attrs->srq) {
2279 			uresp.rqid = qhp->wq.rq.qid;
2280 			uresp.rq_size = qhp->wq.rq.size;
2281 			uresp.rq_memsize = qhp->wq.rq.memsize;
2282 		}
2283 		spin_lock(&ucontext->mmap_lock);
2284 		if (ma_sync_key_mm) {
2285 			uresp.ma_sync_key = ucontext->key;
2286 			ucontext->key += PAGE_SIZE;
2287 		}
2288 		uresp.sq_key = ucontext->key;
2289 		ucontext->key += PAGE_SIZE;
2290 		if (!attrs->srq) {
2291 			uresp.rq_key = ucontext->key;
2292 			ucontext->key += PAGE_SIZE;
2293 		}
2294 		uresp.sq_db_gts_key = ucontext->key;
2295 		ucontext->key += PAGE_SIZE;
2296 		if (!attrs->srq) {
2297 			uresp.rq_db_gts_key = ucontext->key;
2298 			ucontext->key += PAGE_SIZE;
2299 		}
2300 		spin_unlock(&ucontext->mmap_lock);
2301 		ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
2302 		if (ret)
2303 			goto err_free_ma_sync_key;
2304 		sq_key_mm->key = uresp.sq_key;
2305 		sq_key_mm->addr = qhp->wq.sq.phys_addr;
2306 		sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize);
2307 		insert_mmap(ucontext, sq_key_mm);
2308 		if (!attrs->srq) {
2309 			rq_key_mm->key = uresp.rq_key;
2310 			rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
2311 			rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
2312 			insert_mmap(ucontext, rq_key_mm);
2313 		}
2314 		sq_db_key_mm->key = uresp.sq_db_gts_key;
2315 		sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa;
2316 		sq_db_key_mm->len = PAGE_SIZE;
2317 		insert_mmap(ucontext, sq_db_key_mm);
2318 		if (!attrs->srq) {
2319 			rq_db_key_mm->key = uresp.rq_db_gts_key;
2320 			rq_db_key_mm->addr =
2321 				(u64)(unsigned long)qhp->wq.rq.bar2_pa;
2322 			rq_db_key_mm->len = PAGE_SIZE;
2323 			insert_mmap(ucontext, rq_db_key_mm);
2324 		}
2325 		if (ma_sync_key_mm) {
2326 			ma_sync_key_mm->key = uresp.ma_sync_key;
2327 			ma_sync_key_mm->addr =
2328 				(pci_resource_start(rhp->rdev.lldi.pdev, 0) +
2329 				PCIE_MA_SYNC_A) & PAGE_MASK;
2330 			ma_sync_key_mm->len = PAGE_SIZE;
2331 			insert_mmap(ucontext, ma_sync_key_mm);
2332 		}
2333 
2334 		c4iw_get_ucontext(ucontext);
2335 		qhp->ucontext = ucontext;
2336 	}
2337 	if (!attrs->srq) {
2338 		qhp->wq.qp_errp =
2339 			&qhp->wq.rq.queue[qhp->wq.rq.size].status.qp_err;
2340 	} else {
2341 		qhp->wq.qp_errp =
2342 			&qhp->wq.sq.queue[qhp->wq.sq.size].status.qp_err;
2343 		qhp->wq.srqidxp =
2344 			&qhp->wq.sq.queue[qhp->wq.sq.size].status.srqidx;
2345 	}
2346 
2347 	qhp->ibqp.qp_num = qhp->wq.sq.qid;
2348 	if (attrs->srq)
2349 		qhp->srq = to_c4iw_srq(attrs->srq);
2350 	INIT_LIST_HEAD(&qhp->db_fc_entry);
2351 	pr_debug("sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n",
2352 		 qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize,
2353 		 attrs->cap.max_send_wr, qhp->wq.rq.qid, qhp->wq.rq.size,
2354 		 qhp->wq.rq.memsize, attrs->cap.max_recv_wr);
2355 	return &qhp->ibqp;
2356 err_free_ma_sync_key:
2357 	kfree(ma_sync_key_mm);
2358 err_free_rq_db_key:
2359 	if (!attrs->srq)
2360 		kfree(rq_db_key_mm);
2361 err_free_sq_db_key:
2362 	kfree(sq_db_key_mm);
2363 err_free_rq_key:
2364 	if (!attrs->srq)
2365 		kfree(rq_key_mm);
2366 err_free_sq_key:
2367 	kfree(sq_key_mm);
2368 err_remove_handle:
2369 	remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
2370 err_destroy_qp:
2371 	destroy_qp(&rhp->rdev, &qhp->wq,
2372 		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !attrs->srq);
2373 err_free_wr_wait:
2374 	c4iw_put_wr_wait(qhp->wr_waitp);
2375 err_free_qhp:
2376 	kfree(qhp);
2377 	return ERR_PTR(ret);
2378 }
2379 
2380 int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2381 		      int attr_mask, struct ib_udata *udata)
2382 {
2383 	struct c4iw_dev *rhp;
2384 	struct c4iw_qp *qhp;
2385 	enum c4iw_qp_attr_mask mask = 0;
2386 	struct c4iw_qp_attributes attrs;
2387 
2388 	pr_debug("ib_qp %p\n", ibqp);
2389 
2390 	/* iwarp does not support the RTR state */
2391 	if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
2392 		attr_mask &= ~IB_QP_STATE;
2393 
2394 	/* Make sure we still have something left to do */
2395 	if (!attr_mask)
2396 		return 0;
2397 
2398 	memset(&attrs, 0, sizeof attrs);
2399 	qhp = to_c4iw_qp(ibqp);
2400 	rhp = qhp->rhp;
2401 
2402 	attrs.next_state = c4iw_convert_state(attr->qp_state);
2403 	attrs.enable_rdma_read = (attr->qp_access_flags &
2404 			       IB_ACCESS_REMOTE_READ) ?  1 : 0;
2405 	attrs.enable_rdma_write = (attr->qp_access_flags &
2406 				IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2407 	attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0;
2408 
2409 
2410 	mask |= (attr_mask & IB_QP_STATE) ? C4IW_QP_ATTR_NEXT_STATE : 0;
2411 	mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ?
2412 			(C4IW_QP_ATTR_ENABLE_RDMA_READ |
2413 			 C4IW_QP_ATTR_ENABLE_RDMA_WRITE |
2414 			 C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0;
2415 
2416 	/*
2417 	 * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for
2418 	 * ringing the queue db when we're in DB_FULL mode.
2419 	 * Only allow this on T4 devices.
2420 	 */
2421 	attrs.sq_db_inc = attr->sq_psn;
2422 	attrs.rq_db_inc = attr->rq_psn;
2423 	mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0;
2424 	mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0;
2425 	if (!is_t4(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) &&
2426 	    (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB)))
2427 		return -EINVAL;
2428 
2429 	return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0);
2430 }
2431 
2432 struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn)
2433 {
2434 	pr_debug("ib_dev %p qpn 0x%x\n", dev, qpn);
2435 	return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn);
2436 }
2437 
2438 void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq)
2439 {
2440 	struct ib_event event = {};
2441 
2442 	event.device = &srq->rhp->ibdev;
2443 	event.element.srq = &srq->ibsrq;
2444 	event.event = IB_EVENT_SRQ_LIMIT_REACHED;
2445 	ib_dispatch_event(&event);
2446 }
2447 
2448 int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
2449 		    enum ib_srq_attr_mask srq_attr_mask,
2450 		    struct ib_udata *udata)
2451 {
2452 	struct c4iw_srq *srq = to_c4iw_srq(ib_srq);
2453 	int ret = 0;
2454 
2455 	/*
2456 	 * XXX 0 mask == a SW interrupt for srq_limit reached...
2457 	 */
2458 	if (udata && !srq_attr_mask) {
2459 		c4iw_dispatch_srq_limit_reached_event(srq);
2460 		goto out;
2461 	}
2462 
2463 	/* no support for this yet */
2464 	if (srq_attr_mask & IB_SRQ_MAX_WR) {
2465 		ret = -EINVAL;
2466 		goto out;
2467 	}
2468 
2469 	if (!udata && (srq_attr_mask & IB_SRQ_LIMIT)) {
2470 		srq->armed = true;
2471 		srq->srq_limit = attr->srq_limit;
2472 	}
2473 out:
2474 	return ret;
2475 }
2476 
2477 int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2478 		     int attr_mask, struct ib_qp_init_attr *init_attr)
2479 {
2480 	struct c4iw_qp *qhp = to_c4iw_qp(ibqp);
2481 
2482 	memset(attr, 0, sizeof *attr);
2483 	memset(init_attr, 0, sizeof *init_attr);
2484 	attr->qp_state = to_ib_qp_state(qhp->attr.state);
2485 	init_attr->cap.max_send_wr = qhp->attr.sq_num_entries;
2486 	init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries;
2487 	init_attr->cap.max_send_sge = qhp->attr.sq_max_sges;
2488 	init_attr->cap.max_recv_sge = qhp->attr.sq_max_sges;
2489 	init_attr->cap.max_inline_data = T4_MAX_SEND_INLINE;
2490 	init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0;
2491 	return 0;
2492 }
2493 
2494 static void free_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
2495 			   struct c4iw_wr_wait *wr_waitp)
2496 {
2497 	struct c4iw_rdev *rdev = &srq->rhp->rdev;
2498 	struct sk_buff *skb = srq->destroy_skb;
2499 	struct t4_srq *wq = &srq->wq;
2500 	struct fw_ri_res_wr *res_wr;
2501 	struct fw_ri_res *res;
2502 	int wr_len;
2503 
2504 	wr_len = sizeof(*res_wr) + sizeof(*res);
2505 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
2506 
2507 	res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
2508 	memset(res_wr, 0, wr_len);
2509 	res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) |
2510 			FW_RI_RES_WR_NRES_V(1) |
2511 			FW_WR_COMPL_F);
2512 	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
2513 	res_wr->cookie = (uintptr_t)wr_waitp;
2514 	res = res_wr->res;
2515 	res->u.srq.restype = FW_RI_RES_TYPE_SRQ;
2516 	res->u.srq.op = FW_RI_RES_OP_RESET;
2517 	res->u.srq.srqid = cpu_to_be32(srq->idx);
2518 	res->u.srq.eqid = cpu_to_be32(wq->qid);
2519 
2520 	c4iw_init_wr_wait(wr_waitp);
2521 	c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__);
2522 
2523 	dma_free_coherent(&rdev->lldi.pdev->dev,
2524 			  wq->memsize, wq->queue,
2525 			dma_unmap_addr(wq, mapping));
2526 	c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
2527 	kfree(wq->sw_rq);
2528 	c4iw_put_qpid(rdev, wq->qid, uctx);
2529 }
2530 
2531 static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
2532 			   struct c4iw_wr_wait *wr_waitp)
2533 {
2534 	struct c4iw_rdev *rdev = &srq->rhp->rdev;
2535 	int user = (uctx != &rdev->uctx);
2536 	struct t4_srq *wq = &srq->wq;
2537 	struct fw_ri_res_wr *res_wr;
2538 	struct fw_ri_res *res;
2539 	struct sk_buff *skb;
2540 	int wr_len;
2541 	int eqsize;
2542 	int ret = -ENOMEM;
2543 
2544 	wq->qid = c4iw_get_qpid(rdev, uctx);
2545 	if (!wq->qid)
2546 		goto err;
2547 
2548 	if (!user) {
2549 		wq->sw_rq = kcalloc(wq->size, sizeof(*wq->sw_rq),
2550 				    GFP_KERNEL);
2551 		if (!wq->sw_rq)
2552 			goto err_put_qpid;
2553 		wq->pending_wrs = kcalloc(srq->wq.size,
2554 					  sizeof(*srq->wq.pending_wrs),
2555 					  GFP_KERNEL);
2556 		if (!wq->pending_wrs)
2557 			goto err_free_sw_rq;
2558 	}
2559 
2560 	wq->rqt_size = wq->size;
2561 	wq->rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rqt_size);
2562 	if (!wq->rqt_hwaddr)
2563 		goto err_free_pending_wrs;
2564 	wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >>
2565 		T4_RQT_ENTRY_SHIFT;
2566 
2567 	wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
2568 				       wq->memsize, &wq->dma_addr,
2569 			GFP_KERNEL);
2570 	if (!wq->queue)
2571 		goto err_free_rqtpool;
2572 
2573 	memset(wq->queue, 0, wq->memsize);
2574 	dma_unmap_addr_set(wq, mapping, wq->dma_addr);
2575 
2576 	wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS,
2577 				      &wq->bar2_qid,
2578 			user ? &wq->bar2_pa : NULL);
2579 
2580 	/*
2581 	 * User mode must have bar2 access.
2582 	 */
2583 
2584 	if (user && !wq->bar2_va) {
2585 		pr_warn(MOD "%s: srqid %u not in BAR2 range.\n",
2586 			pci_name(rdev->lldi.pdev), wq->qid);
2587 		ret = -EINVAL;
2588 		goto err_free_queue;
2589 	}
2590 
2591 	/* build fw_ri_res_wr */
2592 	wr_len = sizeof(*res_wr) + sizeof(*res);
2593 
2594 	skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
2595 	if (!skb)
2596 		goto err_free_queue;
2597 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
2598 
2599 	res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
2600 	memset(res_wr, 0, wr_len);
2601 	res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) |
2602 			FW_RI_RES_WR_NRES_V(1) |
2603 			FW_WR_COMPL_F);
2604 	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
2605 	res_wr->cookie = (uintptr_t)wr_waitp;
2606 	res = res_wr->res;
2607 	res->u.srq.restype = FW_RI_RES_TYPE_SRQ;
2608 	res->u.srq.op = FW_RI_RES_OP_WRITE;
2609 
2610 	/*
2611 	 * eqsize is the number of 64B entries plus the status page size.
2612 	 */
2613 	eqsize = wq->size * T4_RQ_NUM_SLOTS +
2614 		rdev->hw_queue.t4_eq_status_entries;
2615 	res->u.srq.eqid = cpu_to_be32(wq->qid);
2616 	res->u.srq.fetchszm_to_iqid =
2617 						/* no host cidx updates */
2618 		cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) |
2619 		FW_RI_RES_WR_CPRIO_V(0) |       /* don't keep in chip cache */
2620 		FW_RI_RES_WR_PCIECHN_V(0) |     /* set by uP at ri_init time */
2621 		FW_RI_RES_WR_FETCHRO_V(0));     /* relaxed_ordering */
2622 	res->u.srq.dcaen_to_eqsize =
2623 		cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) |
2624 		FW_RI_RES_WR_DCACPU_V(0) |
2625 		FW_RI_RES_WR_FBMIN_V(2) |
2626 		FW_RI_RES_WR_FBMAX_V(3) |
2627 		FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
2628 		FW_RI_RES_WR_CIDXFTHRESH_V(0) |
2629 		FW_RI_RES_WR_EQSIZE_V(eqsize));
2630 	res->u.srq.eqaddr = cpu_to_be64(wq->dma_addr);
2631 	res->u.srq.srqid = cpu_to_be32(srq->idx);
2632 	res->u.srq.pdid = cpu_to_be32(srq->pdid);
2633 	res->u.srq.hwsrqsize = cpu_to_be32(wq->rqt_size);
2634 	res->u.srq.hwsrqaddr = cpu_to_be32(wq->rqt_hwaddr -
2635 			rdev->lldi.vr->rq.start);
2636 
2637 	c4iw_init_wr_wait(wr_waitp);
2638 
2639 	ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->qid, __func__);
2640 	if (ret)
2641 		goto err_free_queue;
2642 
2643 	pr_debug("%s srq %u eqid %u pdid %u queue va %p pa 0x%llx\n"
2644 			" bar2_addr %p rqt addr 0x%x size %d\n",
2645 			__func__, srq->idx, wq->qid, srq->pdid, wq->queue,
2646 			(u64)virt_to_phys(wq->queue), wq->bar2_va,
2647 			wq->rqt_hwaddr, wq->rqt_size);
2648 
2649 	return 0;
2650 err_free_queue:
2651 	dma_free_coherent(&rdev->lldi.pdev->dev,
2652 			  wq->memsize, wq->queue,
2653 			dma_unmap_addr(wq, mapping));
2654 err_free_rqtpool:
2655 	c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
2656 err_free_pending_wrs:
2657 	if (!user)
2658 		kfree(wq->pending_wrs);
2659 err_free_sw_rq:
2660 	if (!user)
2661 		kfree(wq->sw_rq);
2662 err_put_qpid:
2663 	c4iw_put_qpid(rdev, wq->qid, uctx);
2664 err:
2665 	return ret;
2666 }
2667 
2668 void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16)
2669 {
2670 	u64 *src, *dst;
2671 
2672 	src = (u64 *)wqe;
2673 	dst = (u64 *)((u8 *)srq->queue + srq->wq_pidx * T4_EQ_ENTRY_SIZE);
2674 	while (len16) {
2675 		*dst++ = *src++;
2676 		if (dst >= (u64 *)&srq->queue[srq->size])
2677 			dst = (u64 *)srq->queue;
2678 		*dst++ = *src++;
2679 		if (dst >= (u64 *)&srq->queue[srq->size])
2680 			dst = (u64 *)srq->queue;
2681 		len16--;
2682 	}
2683 }
2684 
2685 struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs,
2686 			       struct ib_udata *udata)
2687 {
2688 	struct c4iw_dev *rhp;
2689 	struct c4iw_srq *srq;
2690 	struct c4iw_pd *php;
2691 	struct c4iw_create_srq_resp uresp;
2692 	struct c4iw_ucontext *ucontext;
2693 	struct c4iw_mm_entry *srq_key_mm, *srq_db_key_mm;
2694 	int rqsize;
2695 	int ret;
2696 	int wr_len;
2697 
2698 	pr_debug("%s ib_pd %p\n", __func__, pd);
2699 
2700 	php = to_c4iw_pd(pd);
2701 	rhp = php->rhp;
2702 
2703 	if (!rhp->rdev.lldi.vr->srq.size)
2704 		return ERR_PTR(-EINVAL);
2705 	if (attrs->attr.max_wr > rhp->rdev.hw_queue.t4_max_rq_size)
2706 		return ERR_PTR(-E2BIG);
2707 	if (attrs->attr.max_sge > T4_MAX_RECV_SGE)
2708 		return ERR_PTR(-E2BIG);
2709 
2710 	/*
2711 	 * SRQ RQT and RQ must be a power of 2 and at least 16 deep.
2712 	 */
2713 	rqsize = attrs->attr.max_wr + 1;
2714 	rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16));
2715 
2716 	ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
2717 
2718 	srq = kzalloc(sizeof(*srq), GFP_KERNEL);
2719 	if (!srq)
2720 		return ERR_PTR(-ENOMEM);
2721 
2722 	srq->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
2723 	if (!srq->wr_waitp) {
2724 		ret = -ENOMEM;
2725 		goto err_free_srq;
2726 	}
2727 
2728 	srq->idx = c4iw_alloc_srq_idx(&rhp->rdev);
2729 	if (srq->idx < 0) {
2730 		ret = -ENOMEM;
2731 		goto err_free_wr_wait;
2732 	}
2733 
2734 	wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res);
2735 	srq->destroy_skb = alloc_skb(wr_len, GFP_KERNEL);
2736 	if (!srq->destroy_skb) {
2737 		ret = -ENOMEM;
2738 		goto err_free_srq_idx;
2739 	}
2740 
2741 	srq->rhp = rhp;
2742 	srq->pdid = php->pdid;
2743 
2744 	srq->wq.size = rqsize;
2745 	srq->wq.memsize =
2746 		(rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
2747 		sizeof(*srq->wq.queue);
2748 	if (ucontext)
2749 		srq->wq.memsize = roundup(srq->wq.memsize, PAGE_SIZE);
2750 
2751 	ret = alloc_srq_queue(srq, ucontext ? &ucontext->uctx :
2752 			&rhp->rdev.uctx, srq->wr_waitp);
2753 	if (ret)
2754 		goto err_free_skb;
2755 	attrs->attr.max_wr = rqsize - 1;
2756 
2757 	if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6)
2758 		srq->flags = T4_SRQ_LIMIT_SUPPORT;
2759 
2760 	ret = insert_handle(rhp, &rhp->qpidr, srq, srq->wq.qid);
2761 	if (ret)
2762 		goto err_free_queue;
2763 
2764 	if (udata) {
2765 		srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL);
2766 		if (!srq_key_mm) {
2767 			ret = -ENOMEM;
2768 			goto err_remove_handle;
2769 		}
2770 		srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL);
2771 		if (!srq_db_key_mm) {
2772 			ret = -ENOMEM;
2773 			goto err_free_srq_key_mm;
2774 		}
2775 		memset(&uresp, 0, sizeof(uresp));
2776 		uresp.flags = srq->flags;
2777 		uresp.qid_mask = rhp->rdev.qpmask;
2778 		uresp.srqid = srq->wq.qid;
2779 		uresp.srq_size = srq->wq.size;
2780 		uresp.srq_memsize = srq->wq.memsize;
2781 		uresp.rqt_abs_idx = srq->wq.rqt_abs_idx;
2782 		spin_lock(&ucontext->mmap_lock);
2783 		uresp.srq_key = ucontext->key;
2784 		ucontext->key += PAGE_SIZE;
2785 		uresp.srq_db_gts_key = ucontext->key;
2786 		ucontext->key += PAGE_SIZE;
2787 		spin_unlock(&ucontext->mmap_lock);
2788 		ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
2789 		if (ret)
2790 			goto err_free_srq_db_key_mm;
2791 		srq_key_mm->key = uresp.srq_key;
2792 		srq_key_mm->addr = virt_to_phys(srq->wq.queue);
2793 		srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize);
2794 		insert_mmap(ucontext, srq_key_mm);
2795 		srq_db_key_mm->key = uresp.srq_db_gts_key;
2796 		srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa;
2797 		srq_db_key_mm->len = PAGE_SIZE;
2798 		insert_mmap(ucontext, srq_db_key_mm);
2799 	}
2800 
2801 	pr_debug("%s srq qid %u idx %u size %u memsize %lu num_entries %u\n",
2802 		 __func__, srq->wq.qid, srq->idx, srq->wq.size,
2803 			(unsigned long)srq->wq.memsize, attrs->attr.max_wr);
2804 
2805 	spin_lock_init(&srq->lock);
2806 	return &srq->ibsrq;
2807 err_free_srq_db_key_mm:
2808 	kfree(srq_db_key_mm);
2809 err_free_srq_key_mm:
2810 	kfree(srq_key_mm);
2811 err_remove_handle:
2812 	remove_handle(rhp, &rhp->qpidr, srq->wq.qid);
2813 err_free_queue:
2814 	free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
2815 		       srq->wr_waitp);
2816 err_free_skb:
2817 	kfree_skb(srq->destroy_skb);
2818 err_free_srq_idx:
2819 	c4iw_free_srq_idx(&rhp->rdev, srq->idx);
2820 err_free_wr_wait:
2821 	c4iw_put_wr_wait(srq->wr_waitp);
2822 err_free_srq:
2823 	kfree(srq);
2824 	return ERR_PTR(ret);
2825 }
2826 
2827 int c4iw_destroy_srq(struct ib_srq *ibsrq)
2828 {
2829 	struct c4iw_dev *rhp;
2830 	struct c4iw_srq *srq;
2831 	struct c4iw_ucontext *ucontext;
2832 
2833 	srq = to_c4iw_srq(ibsrq);
2834 	rhp = srq->rhp;
2835 
2836 	pr_debug("%s id %d\n", __func__, srq->wq.qid);
2837 
2838 	remove_handle(rhp, &rhp->qpidr, srq->wq.qid);
2839 	ucontext = ibsrq->uobject ?
2840 		to_c4iw_ucontext(ibsrq->uobject->context) : NULL;
2841 	free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
2842 		       srq->wr_waitp);
2843 	c4iw_free_srq_idx(&rhp->rdev, srq->idx);
2844 	c4iw_put_wr_wait(srq->wr_waitp);
2845 	kfree(srq);
2846 	return 0;
2847 }
2848