1 /*
2  * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
3  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #include <linux/spinlock.h>
35 #include <rdma/ib_smi.h>
36 
37 #include "qib.h"
38 #include "qib_mad.h"
39 
40 /*
41  * Validate a RWQE and fill in the SGE state.
42  * Return 1 if OK.
43  */
44 static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
45 {
46 	int i, j, ret;
47 	struct ib_wc wc;
48 	struct rvt_lkey_table *rkt;
49 	struct rvt_pd *pd;
50 	struct rvt_sge_state *ss;
51 
52 	rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table;
53 	pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
54 	ss = &qp->r_sge;
55 	ss->sg_list = qp->r_sg_list;
56 	qp->r_len = 0;
57 	for (i = j = 0; i < wqe->num_sge; i++) {
58 		if (wqe->sg_list[i].length == 0)
59 			continue;
60 		/* Check LKEY */
61 		ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
62 				  NULL, &wqe->sg_list[i],
63 				  IB_ACCESS_LOCAL_WRITE);
64 		if (unlikely(ret <= 0))
65 			goto bad_lkey;
66 		qp->r_len += wqe->sg_list[i].length;
67 		j++;
68 	}
69 	ss->num_sge = j;
70 	ss->total_len = qp->r_len;
71 	ret = 1;
72 	goto bail;
73 
74 bad_lkey:
75 	while (j) {
76 		struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
77 
78 		rvt_put_mr(sge->mr);
79 	}
80 	ss->num_sge = 0;
81 	memset(&wc, 0, sizeof(wc));
82 	wc.wr_id = wqe->wr_id;
83 	wc.status = IB_WC_LOC_PROT_ERR;
84 	wc.opcode = IB_WC_RECV;
85 	wc.qp = &qp->ibqp;
86 	/* Signal solicited completion event. */
87 	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
88 	ret = 0;
89 bail:
90 	return ret;
91 }
92 
93 /**
94  * qib_get_rwqe - copy the next RWQE into the QP's RWQE
95  * @qp: the QP
96  * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
97  *
98  * Return -1 if there is a local error, 0 if no RWQE is available,
99  * otherwise return 1.
100  *
101  * Can be called from interrupt level.
102  */
103 int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only)
104 {
105 	unsigned long flags;
106 	struct rvt_rq *rq;
107 	struct rvt_rwq *wq;
108 	struct rvt_srq *srq;
109 	struct rvt_rwqe *wqe;
110 	void (*handler)(struct ib_event *, void *);
111 	u32 tail;
112 	int ret;
113 
114 	if (qp->ibqp.srq) {
115 		srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
116 		handler = srq->ibsrq.event_handler;
117 		rq = &srq->rq;
118 	} else {
119 		srq = NULL;
120 		handler = NULL;
121 		rq = &qp->r_rq;
122 	}
123 
124 	spin_lock_irqsave(&rq->lock, flags);
125 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
126 		ret = 0;
127 		goto unlock;
128 	}
129 
130 	wq = rq->wq;
131 	tail = wq->tail;
132 	/* Validate tail before using it since it is user writable. */
133 	if (tail >= rq->size)
134 		tail = 0;
135 	if (unlikely(tail == wq->head)) {
136 		ret = 0;
137 		goto unlock;
138 	}
139 	/* Make sure entry is read after head index is read. */
140 	smp_rmb();
141 	wqe = rvt_get_rwqe_ptr(rq, tail);
142 	/*
143 	 * Even though we update the tail index in memory, the verbs
144 	 * consumer is not supposed to post more entries until a
145 	 * completion is generated.
146 	 */
147 	if (++tail >= rq->size)
148 		tail = 0;
149 	wq->tail = tail;
150 	if (!wr_id_only && !qib_init_sge(qp, wqe)) {
151 		ret = -1;
152 		goto unlock;
153 	}
154 	qp->r_wr_id = wqe->wr_id;
155 
156 	ret = 1;
157 	set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
158 	if (handler) {
159 		u32 n;
160 
161 		/*
162 		 * Validate head pointer value and compute
163 		 * the number of remaining WQEs.
164 		 */
165 		n = wq->head;
166 		if (n >= rq->size)
167 			n = 0;
168 		if (n < tail)
169 			n += rq->size - tail;
170 		else
171 			n -= tail;
172 		if (n < srq->limit) {
173 			struct ib_event ev;
174 
175 			srq->limit = 0;
176 			spin_unlock_irqrestore(&rq->lock, flags);
177 			ev.device = qp->ibqp.device;
178 			ev.element.srq = qp->ibqp.srq;
179 			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
180 			handler(&ev, srq->ibsrq.srq_context);
181 			goto bail;
182 		}
183 	}
184 unlock:
185 	spin_unlock_irqrestore(&rq->lock, flags);
186 bail:
187 	return ret;
188 }
189 
190 /*
191  * Switch to alternate path.
192  * The QP s_lock should be held and interrupts disabled.
193  */
194 void qib_migrate_qp(struct rvt_qp *qp)
195 {
196 	struct ib_event ev;
197 
198 	qp->s_mig_state = IB_MIG_MIGRATED;
199 	qp->remote_ah_attr = qp->alt_ah_attr;
200 	qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
201 	qp->s_pkey_index = qp->s_alt_pkey_index;
202 
203 	ev.device = qp->ibqp.device;
204 	ev.element.qp = &qp->ibqp;
205 	ev.event = IB_EVENT_PATH_MIG;
206 	qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
207 }
208 
209 static __be64 get_sguid(struct qib_ibport *ibp, unsigned index)
210 {
211 	if (!index) {
212 		struct qib_pportdata *ppd = ppd_from_ibp(ibp);
213 
214 		return ppd->guid;
215 	}
216 	return ibp->guids[index - 1];
217 }
218 
219 static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
220 {
221 	return (gid->global.interface_id == id &&
222 		(gid->global.subnet_prefix == gid_prefix ||
223 		 gid->global.subnet_prefix == IB_DEFAULT_GID_PREFIX));
224 }
225 
226 /*
227  *
228  * This should be called with the QP r_lock held.
229  *
230  * The s_lock will be acquired around the qib_migrate_qp() call.
231  */
232 int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
233 		      int has_grh, struct rvt_qp *qp, u32 bth0)
234 {
235 	__be64 guid;
236 	unsigned long flags;
237 
238 	if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
239 		if (!has_grh) {
240 			if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
241 			    IB_AH_GRH)
242 				goto err;
243 		} else {
244 			const struct ib_global_route *grh;
245 
246 			if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
247 			      IB_AH_GRH))
248 				goto err;
249 			grh = rdma_ah_read_grh(&qp->alt_ah_attr);
250 			guid = get_sguid(ibp, grh->sgid_index);
251 			if (!gid_ok(&hdr->u.l.grh.dgid,
252 				    ibp->rvp.gid_prefix, guid))
253 				goto err;
254 			if (!gid_ok(&hdr->u.l.grh.sgid,
255 			    grh->dgid.global.subnet_prefix,
256 			    grh->dgid.global.interface_id))
257 				goto err;
258 		}
259 		if (!qib_pkey_ok((u16)bth0,
260 				 qib_get_pkey(ibp, qp->s_alt_pkey_index))) {
261 			qib_bad_pkey(ibp,
262 				     (u16)bth0,
263 				     (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
264 				     0, qp->ibqp.qp_num,
265 				     hdr->lrh[3], hdr->lrh[1]);
266 			goto err;
267 		}
268 		/* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
269 		if ((be16_to_cpu(hdr->lrh[3]) !=
270 		     rdma_ah_get_dlid(&qp->alt_ah_attr)) ||
271 		    ppd_from_ibp(ibp)->port !=
272 			    rdma_ah_get_port_num(&qp->alt_ah_attr))
273 			goto err;
274 		spin_lock_irqsave(&qp->s_lock, flags);
275 		qib_migrate_qp(qp);
276 		spin_unlock_irqrestore(&qp->s_lock, flags);
277 	} else {
278 		if (!has_grh) {
279 			if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
280 			    IB_AH_GRH)
281 				goto err;
282 		} else {
283 			const struct ib_global_route *grh;
284 
285 			if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
286 			      IB_AH_GRH))
287 				goto err;
288 			grh = rdma_ah_read_grh(&qp->remote_ah_attr);
289 			guid = get_sguid(ibp, grh->sgid_index);
290 			if (!gid_ok(&hdr->u.l.grh.dgid,
291 				    ibp->rvp.gid_prefix, guid))
292 				goto err;
293 			if (!gid_ok(&hdr->u.l.grh.sgid,
294 			    grh->dgid.global.subnet_prefix,
295 			    grh->dgid.global.interface_id))
296 				goto err;
297 		}
298 		if (!qib_pkey_ok((u16)bth0,
299 				 qib_get_pkey(ibp, qp->s_pkey_index))) {
300 			qib_bad_pkey(ibp,
301 				     (u16)bth0,
302 				     (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
303 				     0, qp->ibqp.qp_num,
304 				     hdr->lrh[3], hdr->lrh[1]);
305 			goto err;
306 		}
307 		/* Validate the SLID. See Ch. 9.6.1.5 */
308 		if (be16_to_cpu(hdr->lrh[3]) !=
309 		    rdma_ah_get_dlid(&qp->remote_ah_attr) ||
310 		    ppd_from_ibp(ibp)->port != qp->port_num)
311 			goto err;
312 		if (qp->s_mig_state == IB_MIG_REARM &&
313 		    !(bth0 & IB_BTH_MIG_REQ))
314 			qp->s_mig_state = IB_MIG_ARMED;
315 	}
316 
317 	return 0;
318 
319 err:
320 	return 1;
321 }
322 
323 /**
324  * qib_ruc_loopback - handle UC and RC lookback requests
325  * @sqp: the sending QP
326  *
327  * This is called from qib_do_send() to
328  * forward a WQE addressed to the same HCA.
329  * Note that although we are single threaded due to the tasklet, we still
330  * have to protect against post_send().  We don't have to worry about
331  * receive interrupts since this is a connected protocol and all packets
332  * will pass through here.
333  */
334 static void qib_ruc_loopback(struct rvt_qp *sqp)
335 {
336 	struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
337 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
338 	struct qib_devdata *dd = ppd->dd;
339 	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
340 	struct rvt_qp *qp;
341 	struct rvt_swqe *wqe;
342 	struct rvt_sge *sge;
343 	unsigned long flags;
344 	struct ib_wc wc;
345 	u64 sdata;
346 	atomic64_t *maddr;
347 	enum ib_wc_status send_status;
348 	int release;
349 	int ret;
350 
351 	rcu_read_lock();
352 	/*
353 	 * Note that we check the responder QP state after
354 	 * checking the requester's state.
355 	 */
356 	qp = rvt_lookup_qpn(rdi, &ibp->rvp, sqp->remote_qpn);
357 	if (!qp)
358 		goto done;
359 
360 	spin_lock_irqsave(&sqp->s_lock, flags);
361 
362 	/* Return if we are already busy processing a work request. */
363 	if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
364 	    !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
365 		goto unlock;
366 
367 	sqp->s_flags |= RVT_S_BUSY;
368 
369 again:
370 	smp_read_barrier_depends(); /* see post_one_send() */
371 	if (sqp->s_last == READ_ONCE(sqp->s_head))
372 		goto clr_busy;
373 	wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
374 
375 	/* Return if it is not OK to start a new work reqeust. */
376 	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
377 		if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
378 			goto clr_busy;
379 		/* We are in the error state, flush the work request. */
380 		send_status = IB_WC_WR_FLUSH_ERR;
381 		goto flush_send;
382 	}
383 
384 	/*
385 	 * We can rely on the entry not changing without the s_lock
386 	 * being held until we update s_last.
387 	 * We increment s_cur to indicate s_last is in progress.
388 	 */
389 	if (sqp->s_last == sqp->s_cur) {
390 		if (++sqp->s_cur >= sqp->s_size)
391 			sqp->s_cur = 0;
392 	}
393 	spin_unlock_irqrestore(&sqp->s_lock, flags);
394 
395 	if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
396 	    qp->ibqp.qp_type != sqp->ibqp.qp_type) {
397 		ibp->rvp.n_pkt_drops++;
398 		/*
399 		 * For RC, the requester would timeout and retry so
400 		 * shortcut the timeouts and just signal too many retries.
401 		 */
402 		if (sqp->ibqp.qp_type == IB_QPT_RC)
403 			send_status = IB_WC_RETRY_EXC_ERR;
404 		else
405 			send_status = IB_WC_SUCCESS;
406 		goto serr;
407 	}
408 
409 	memset(&wc, 0, sizeof(wc));
410 	send_status = IB_WC_SUCCESS;
411 
412 	release = 1;
413 	sqp->s_sge.sge = wqe->sg_list[0];
414 	sqp->s_sge.sg_list = wqe->sg_list + 1;
415 	sqp->s_sge.num_sge = wqe->wr.num_sge;
416 	sqp->s_len = wqe->length;
417 	switch (wqe->wr.opcode) {
418 	case IB_WR_SEND_WITH_IMM:
419 		wc.wc_flags = IB_WC_WITH_IMM;
420 		wc.ex.imm_data = wqe->wr.ex.imm_data;
421 		/* FALLTHROUGH */
422 	case IB_WR_SEND:
423 		ret = qib_get_rwqe(qp, 0);
424 		if (ret < 0)
425 			goto op_err;
426 		if (!ret)
427 			goto rnr_nak;
428 		break;
429 
430 	case IB_WR_RDMA_WRITE_WITH_IMM:
431 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
432 			goto inv_err;
433 		wc.wc_flags = IB_WC_WITH_IMM;
434 		wc.ex.imm_data = wqe->wr.ex.imm_data;
435 		ret = qib_get_rwqe(qp, 1);
436 		if (ret < 0)
437 			goto op_err;
438 		if (!ret)
439 			goto rnr_nak;
440 		/* FALLTHROUGH */
441 	case IB_WR_RDMA_WRITE:
442 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
443 			goto inv_err;
444 		if (wqe->length == 0)
445 			break;
446 		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
447 					  wqe->rdma_wr.remote_addr,
448 					  wqe->rdma_wr.rkey,
449 					  IB_ACCESS_REMOTE_WRITE)))
450 			goto acc_err;
451 		qp->r_sge.sg_list = NULL;
452 		qp->r_sge.num_sge = 1;
453 		qp->r_sge.total_len = wqe->length;
454 		break;
455 
456 	case IB_WR_RDMA_READ:
457 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
458 			goto inv_err;
459 		if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
460 					  wqe->rdma_wr.remote_addr,
461 					  wqe->rdma_wr.rkey,
462 					  IB_ACCESS_REMOTE_READ)))
463 			goto acc_err;
464 		release = 0;
465 		sqp->s_sge.sg_list = NULL;
466 		sqp->s_sge.num_sge = 1;
467 		qp->r_sge.sge = wqe->sg_list[0];
468 		qp->r_sge.sg_list = wqe->sg_list + 1;
469 		qp->r_sge.num_sge = wqe->wr.num_sge;
470 		qp->r_sge.total_len = wqe->length;
471 		break;
472 
473 	case IB_WR_ATOMIC_CMP_AND_SWP:
474 	case IB_WR_ATOMIC_FETCH_AND_ADD:
475 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
476 			goto inv_err;
477 		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
478 					  wqe->atomic_wr.remote_addr,
479 					  wqe->atomic_wr.rkey,
480 					  IB_ACCESS_REMOTE_ATOMIC)))
481 			goto acc_err;
482 		/* Perform atomic OP and save result. */
483 		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
484 		sdata = wqe->atomic_wr.compare_add;
485 		*(u64 *) sqp->s_sge.sge.vaddr =
486 			(wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
487 			(u64) atomic64_add_return(sdata, maddr) - sdata :
488 			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
489 				      sdata, wqe->atomic_wr.swap);
490 		rvt_put_mr(qp->r_sge.sge.mr);
491 		qp->r_sge.num_sge = 0;
492 		goto send_comp;
493 
494 	default:
495 		send_status = IB_WC_LOC_QP_OP_ERR;
496 		goto serr;
497 	}
498 
499 	sge = &sqp->s_sge.sge;
500 	while (sqp->s_len) {
501 		u32 len = sqp->s_len;
502 
503 		if (len > sge->length)
504 			len = sge->length;
505 		if (len > sge->sge_length)
506 			len = sge->sge_length;
507 		BUG_ON(len == 0);
508 		qib_copy_sge(&qp->r_sge, sge->vaddr, len, release);
509 		sge->vaddr += len;
510 		sge->length -= len;
511 		sge->sge_length -= len;
512 		if (sge->sge_length == 0) {
513 			if (!release)
514 				rvt_put_mr(sge->mr);
515 			if (--sqp->s_sge.num_sge)
516 				*sge = *sqp->s_sge.sg_list++;
517 		} else if (sge->length == 0 && sge->mr->lkey) {
518 			if (++sge->n >= RVT_SEGSZ) {
519 				if (++sge->m >= sge->mr->mapsz)
520 					break;
521 				sge->n = 0;
522 			}
523 			sge->vaddr =
524 				sge->mr->map[sge->m]->segs[sge->n].vaddr;
525 			sge->length =
526 				sge->mr->map[sge->m]->segs[sge->n].length;
527 		}
528 		sqp->s_len -= len;
529 	}
530 	if (release)
531 		rvt_put_ss(&qp->r_sge);
532 
533 	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
534 		goto send_comp;
535 
536 	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
537 		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
538 	else
539 		wc.opcode = IB_WC_RECV;
540 	wc.wr_id = qp->r_wr_id;
541 	wc.status = IB_WC_SUCCESS;
542 	wc.byte_len = wqe->length;
543 	wc.qp = &qp->ibqp;
544 	wc.src_qp = qp->remote_qpn;
545 	wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
546 	wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
547 	wc.port_num = 1;
548 	/* Signal completion event if the solicited bit is set. */
549 	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
550 		     wqe->wr.send_flags & IB_SEND_SOLICITED);
551 
552 send_comp:
553 	spin_lock_irqsave(&sqp->s_lock, flags);
554 	ibp->rvp.n_loop_pkts++;
555 flush_send:
556 	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
557 	qib_send_complete(sqp, wqe, send_status);
558 	goto again;
559 
560 rnr_nak:
561 	/* Handle RNR NAK */
562 	if (qp->ibqp.qp_type == IB_QPT_UC)
563 		goto send_comp;
564 	ibp->rvp.n_rnr_naks++;
565 	/*
566 	 * Note: we don't need the s_lock held since the BUSY flag
567 	 * makes this single threaded.
568 	 */
569 	if (sqp->s_rnr_retry == 0) {
570 		send_status = IB_WC_RNR_RETRY_EXC_ERR;
571 		goto serr;
572 	}
573 	if (sqp->s_rnr_retry_cnt < 7)
574 		sqp->s_rnr_retry--;
575 	spin_lock_irqsave(&sqp->s_lock, flags);
576 	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
577 		goto clr_busy;
578 	rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
579 				IB_AETH_CREDIT_SHIFT);
580 	goto clr_busy;
581 
582 op_err:
583 	send_status = IB_WC_REM_OP_ERR;
584 	wc.status = IB_WC_LOC_QP_OP_ERR;
585 	goto err;
586 
587 inv_err:
588 	send_status = IB_WC_REM_INV_REQ_ERR;
589 	wc.status = IB_WC_LOC_QP_OP_ERR;
590 	goto err;
591 
592 acc_err:
593 	send_status = IB_WC_REM_ACCESS_ERR;
594 	wc.status = IB_WC_LOC_PROT_ERR;
595 err:
596 	/* responder goes to error state */
597 	rvt_rc_error(qp, wc.status);
598 
599 serr:
600 	spin_lock_irqsave(&sqp->s_lock, flags);
601 	qib_send_complete(sqp, wqe, send_status);
602 	if (sqp->ibqp.qp_type == IB_QPT_RC) {
603 		int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
604 
605 		sqp->s_flags &= ~RVT_S_BUSY;
606 		spin_unlock_irqrestore(&sqp->s_lock, flags);
607 		if (lastwqe) {
608 			struct ib_event ev;
609 
610 			ev.device = sqp->ibqp.device;
611 			ev.element.qp = &sqp->ibqp;
612 			ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
613 			sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
614 		}
615 		goto done;
616 	}
617 clr_busy:
618 	sqp->s_flags &= ~RVT_S_BUSY;
619 unlock:
620 	spin_unlock_irqrestore(&sqp->s_lock, flags);
621 done:
622 	rcu_read_unlock();
623 }
624 
625 /**
626  * qib_make_grh - construct a GRH header
627  * @ibp: a pointer to the IB port
628  * @hdr: a pointer to the GRH header being constructed
629  * @grh: the global route address to send to
630  * @hwords: the number of 32 bit words of header being sent
631  * @nwords: the number of 32 bit words of data being sent
632  *
633  * Return the size of the header in 32 bit words.
634  */
635 u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
636 		 const struct ib_global_route *grh, u32 hwords, u32 nwords)
637 {
638 	hdr->version_tclass_flow =
639 		cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) |
640 			    (grh->traffic_class << IB_GRH_TCLASS_SHIFT) |
641 			    (grh->flow_label << IB_GRH_FLOW_SHIFT));
642 	hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
643 	/* next_hdr is defined by C8-7 in ch. 8.4.1 */
644 	hdr->next_hdr = IB_GRH_NEXT_HDR;
645 	hdr->hop_limit = grh->hop_limit;
646 	/* The SGID is 32-bit aligned. */
647 	hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
648 	if (!grh->sgid_index)
649 		hdr->sgid.global.interface_id = ppd_from_ibp(ibp)->guid;
650 	else if (grh->sgid_index < QIB_GUIDS_PER_PORT)
651 		hdr->sgid.global.interface_id = ibp->guids[grh->sgid_index - 1];
652 	hdr->dgid = grh->dgid;
653 
654 	/* GRH header size in 32-bit words. */
655 	return sizeof(struct ib_grh) / sizeof(u32);
656 }
657 
658 void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
659 			 u32 bth0, u32 bth2)
660 {
661 	struct qib_qp_priv *priv = qp->priv;
662 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
663 	u16 lrh0;
664 	u32 nwords;
665 	u32 extra_bytes;
666 
667 	/* Construct the header. */
668 	extra_bytes = -qp->s_cur_size & 3;
669 	nwords = (qp->s_cur_size + extra_bytes) >> 2;
670 	lrh0 = QIB_LRH_BTH;
671 	if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) {
672 		qp->s_hdrwords +=
673 			qib_make_grh(ibp, &priv->s_hdr->u.l.grh,
674 				     rdma_ah_read_grh(&qp->remote_ah_attr),
675 				     qp->s_hdrwords, nwords);
676 		lrh0 = QIB_LRH_GRH;
677 	}
678 	lrh0 |= ibp->sl_to_vl[rdma_ah_get_sl(&qp->remote_ah_attr)] << 12 |
679 		rdma_ah_get_sl(&qp->remote_ah_attr) << 4;
680 	priv->s_hdr->lrh[0] = cpu_to_be16(lrh0);
681 	priv->s_hdr->lrh[1] =
682 			cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr));
683 	priv->s_hdr->lrh[2] =
684 			cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
685 	priv->s_hdr->lrh[3] =
686 		cpu_to_be16(ppd_from_ibp(ibp)->lid |
687 			    rdma_ah_get_path_bits(&qp->remote_ah_attr));
688 	bth0 |= qib_get_pkey(ibp, qp->s_pkey_index);
689 	bth0 |= extra_bytes << 20;
690 	if (qp->s_mig_state == IB_MIG_MIGRATED)
691 		bth0 |= IB_BTH_MIG_REQ;
692 	ohdr->bth[0] = cpu_to_be32(bth0);
693 	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
694 	ohdr->bth[2] = cpu_to_be32(bth2);
695 	this_cpu_inc(ibp->pmastats->n_unicast_xmit);
696 }
697 
698 void _qib_do_send(struct work_struct *work)
699 {
700 	struct qib_qp_priv *priv = container_of(work, struct qib_qp_priv,
701 						s_work);
702 	struct rvt_qp *qp = priv->owner;
703 
704 	qib_do_send(qp);
705 }
706 
707 /**
708  * qib_do_send - perform a send on a QP
709  * @qp: pointer to the QP
710  *
711  * Process entries in the send work queue until credit or queue is
712  * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
713  * Otherwise, two threads could send packets out of order.
714  */
715 void qib_do_send(struct rvt_qp *qp)
716 {
717 	struct qib_qp_priv *priv = qp->priv;
718 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
719 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
720 	int (*make_req)(struct rvt_qp *qp, unsigned long *flags);
721 	unsigned long flags;
722 
723 	if ((qp->ibqp.qp_type == IB_QPT_RC ||
724 	     qp->ibqp.qp_type == IB_QPT_UC) &&
725 	    (rdma_ah_get_dlid(&qp->remote_ah_attr) &
726 	     ~((1 << ppd->lmc) - 1)) == ppd->lid) {
727 		qib_ruc_loopback(qp);
728 		return;
729 	}
730 
731 	if (qp->ibqp.qp_type == IB_QPT_RC)
732 		make_req = qib_make_rc_req;
733 	else if (qp->ibqp.qp_type == IB_QPT_UC)
734 		make_req = qib_make_uc_req;
735 	else
736 		make_req = qib_make_ud_req;
737 
738 	spin_lock_irqsave(&qp->s_lock, flags);
739 
740 	/* Return if we are already busy processing a work request. */
741 	if (!qib_send_ok(qp)) {
742 		spin_unlock_irqrestore(&qp->s_lock, flags);
743 		return;
744 	}
745 
746 	qp->s_flags |= RVT_S_BUSY;
747 
748 	do {
749 		/* Check for a constructed packet to be sent. */
750 		if (qp->s_hdrwords != 0) {
751 			spin_unlock_irqrestore(&qp->s_lock, flags);
752 			/*
753 			 * If the packet cannot be sent now, return and
754 			 * the send tasklet will be woken up later.
755 			 */
756 			if (qib_verbs_send(qp, priv->s_hdr, qp->s_hdrwords,
757 					   qp->s_cur_sge, qp->s_cur_size))
758 				return;
759 			/* Record that s_hdr is empty. */
760 			qp->s_hdrwords = 0;
761 			spin_lock_irqsave(&qp->s_lock, flags);
762 		}
763 	} while (make_req(qp, &flags));
764 
765 	spin_unlock_irqrestore(&qp->s_lock, flags);
766 }
767 
768 /*
769  * This should be called with s_lock held.
770  */
771 void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
772 		       enum ib_wc_status status)
773 {
774 	u32 old_last, last;
775 
776 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
777 		return;
778 
779 	last = qp->s_last;
780 	old_last = last;
781 	if (++last >= qp->s_size)
782 		last = 0;
783 	qp->s_last = last;
784 	/* See post_send() */
785 	barrier();
786 	rvt_put_swqe(wqe);
787 	if (qp->ibqp.qp_type == IB_QPT_UD ||
788 	    qp->ibqp.qp_type == IB_QPT_SMI ||
789 	    qp->ibqp.qp_type == IB_QPT_GSI)
790 		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
791 
792 	rvt_qp_swqe_complete(qp,
793 			     wqe,
794 			     ib_qib_wc_opcode[wqe->wr.opcode],
795 			     status);
796 
797 	if (qp->s_acked == old_last)
798 		qp->s_acked = last;
799 	if (qp->s_cur == old_last)
800 		qp->s_cur = last;
801 	if (qp->s_tail == old_last)
802 		qp->s_tail = last;
803 	if (qp->state == IB_QPS_SQD && last == qp->s_cur)
804 		qp->s_draining = 0;
805 }
806