xref: /openbmc/linux/drivers/infiniband/hw/qib/qib_rc.c (revision 82003e04)
1 /*
2  * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
3  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #include <linux/io.h>
35 
36 #include "qib.h"
37 
38 /* cut down ridiculously long IB macro names */
39 #define OP(x) IB_OPCODE_RC_##x
40 
41 static void rc_timeout(unsigned long arg);
42 
43 static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
44 		       u32 psn, u32 pmtu)
45 {
46 	u32 len;
47 
48 	len = ((psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
49 	ss->sge = wqe->sg_list[0];
50 	ss->sg_list = wqe->sg_list + 1;
51 	ss->num_sge = wqe->wr.num_sge;
52 	ss->total_len = wqe->length;
53 	qib_skip_sge(ss, len, 0);
54 	return wqe->length - len;
55 }
56 
57 static void start_timer(struct rvt_qp *qp)
58 {
59 	qp->s_flags |= RVT_S_TIMER;
60 	qp->s_timer.function = rc_timeout;
61 	/* 4.096 usec. * (1 << qp->timeout) */
62 	qp->s_timer.expires = jiffies + qp->timeout_jiffies;
63 	add_timer(&qp->s_timer);
64 }
65 
66 /**
67  * qib_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
68  * @dev: the device for this QP
69  * @qp: a pointer to the QP
70  * @ohdr: a pointer to the IB header being constructed
71  * @pmtu: the path MTU
72  *
73  * Return 1 if constructed; otherwise, return 0.
74  * Note that we are in the responder's side of the QP context.
75  * Note the QP s_lock must be held.
76  */
77 static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
78 			   struct ib_other_headers *ohdr, u32 pmtu)
79 {
80 	struct rvt_ack_entry *e;
81 	u32 hwords;
82 	u32 len;
83 	u32 bth0;
84 	u32 bth2;
85 
86 	/* Don't send an ACK if we aren't supposed to. */
87 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
88 		goto bail;
89 
90 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
91 	hwords = 5;
92 
93 	switch (qp->s_ack_state) {
94 	case OP(RDMA_READ_RESPONSE_LAST):
95 	case OP(RDMA_READ_RESPONSE_ONLY):
96 		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
97 		if (e->rdma_sge.mr) {
98 			rvt_put_mr(e->rdma_sge.mr);
99 			e->rdma_sge.mr = NULL;
100 		}
101 		/* FALLTHROUGH */
102 	case OP(ATOMIC_ACKNOWLEDGE):
103 		/*
104 		 * We can increment the tail pointer now that the last
105 		 * response has been sent instead of only being
106 		 * constructed.
107 		 */
108 		if (++qp->s_tail_ack_queue > QIB_MAX_RDMA_ATOMIC)
109 			qp->s_tail_ack_queue = 0;
110 		/* FALLTHROUGH */
111 	case OP(SEND_ONLY):
112 	case OP(ACKNOWLEDGE):
113 		/* Check for no next entry in the queue. */
114 		if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
115 			if (qp->s_flags & RVT_S_ACK_PENDING)
116 				goto normal;
117 			goto bail;
118 		}
119 
120 		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
121 		if (e->opcode == OP(RDMA_READ_REQUEST)) {
122 			/*
123 			 * If a RDMA read response is being resent and
124 			 * we haven't seen the duplicate request yet,
125 			 * then stop sending the remaining responses the
126 			 * responder has seen until the requester resends it.
127 			 */
128 			len = e->rdma_sge.sge_length;
129 			if (len && !e->rdma_sge.mr) {
130 				qp->s_tail_ack_queue = qp->r_head_ack_queue;
131 				goto bail;
132 			}
133 			/* Copy SGE state in case we need to resend */
134 			qp->s_rdma_mr = e->rdma_sge.mr;
135 			if (qp->s_rdma_mr)
136 				rvt_get_mr(qp->s_rdma_mr);
137 			qp->s_ack_rdma_sge.sge = e->rdma_sge;
138 			qp->s_ack_rdma_sge.num_sge = 1;
139 			qp->s_cur_sge = &qp->s_ack_rdma_sge;
140 			if (len > pmtu) {
141 				len = pmtu;
142 				qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
143 			} else {
144 				qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
145 				e->sent = 1;
146 			}
147 			ohdr->u.aeth = qib_compute_aeth(qp);
148 			hwords++;
149 			qp->s_ack_rdma_psn = e->psn;
150 			bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK;
151 		} else {
152 			/* COMPARE_SWAP or FETCH_ADD */
153 			qp->s_cur_sge = NULL;
154 			len = 0;
155 			qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
156 			ohdr->u.at.aeth = qib_compute_aeth(qp);
157 			ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
158 			hwords += sizeof(ohdr->u.at) / sizeof(u32);
159 			bth2 = e->psn & QIB_PSN_MASK;
160 			e->sent = 1;
161 		}
162 		bth0 = qp->s_ack_state << 24;
163 		break;
164 
165 	case OP(RDMA_READ_RESPONSE_FIRST):
166 		qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
167 		/* FALLTHROUGH */
168 	case OP(RDMA_READ_RESPONSE_MIDDLE):
169 		qp->s_cur_sge = &qp->s_ack_rdma_sge;
170 		qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
171 		if (qp->s_rdma_mr)
172 			rvt_get_mr(qp->s_rdma_mr);
173 		len = qp->s_ack_rdma_sge.sge.sge_length;
174 		if (len > pmtu)
175 			len = pmtu;
176 		else {
177 			ohdr->u.aeth = qib_compute_aeth(qp);
178 			hwords++;
179 			qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
180 			e = &qp->s_ack_queue[qp->s_tail_ack_queue];
181 			e->sent = 1;
182 		}
183 		bth0 = qp->s_ack_state << 24;
184 		bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK;
185 		break;
186 
187 	default:
188 normal:
189 		/*
190 		 * Send a regular ACK.
191 		 * Set the s_ack_state so we wait until after sending
192 		 * the ACK before setting s_ack_state to ACKNOWLEDGE
193 		 * (see above).
194 		 */
195 		qp->s_ack_state = OP(SEND_ONLY);
196 		qp->s_flags &= ~RVT_S_ACK_PENDING;
197 		qp->s_cur_sge = NULL;
198 		if (qp->s_nak_state)
199 			ohdr->u.aeth =
200 				cpu_to_be32((qp->r_msn & QIB_MSN_MASK) |
201 					    (qp->s_nak_state <<
202 					     QIB_AETH_CREDIT_SHIFT));
203 		else
204 			ohdr->u.aeth = qib_compute_aeth(qp);
205 		hwords++;
206 		len = 0;
207 		bth0 = OP(ACKNOWLEDGE) << 24;
208 		bth2 = qp->s_ack_psn & QIB_PSN_MASK;
209 	}
210 	qp->s_rdma_ack_cnt++;
211 	qp->s_hdrwords = hwords;
212 	qp->s_cur_size = len;
213 	qib_make_ruc_header(qp, ohdr, bth0, bth2);
214 	return 1;
215 
216 bail:
217 	qp->s_ack_state = OP(ACKNOWLEDGE);
218 	qp->s_flags &= ~(RVT_S_RESP_PENDING | RVT_S_ACK_PENDING);
219 	return 0;
220 }
221 
222 /**
223  * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
224  * @qp: a pointer to the QP
225  *
226  * Assumes the s_lock is held.
227  *
228  * Return 1 if constructed; otherwise, return 0.
229  */
230 int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
231 {
232 	struct qib_qp_priv *priv = qp->priv;
233 	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
234 	struct ib_other_headers *ohdr;
235 	struct rvt_sge_state *ss;
236 	struct rvt_swqe *wqe;
237 	u32 hwords;
238 	u32 len;
239 	u32 bth0;
240 	u32 bth2;
241 	u32 pmtu = qp->pmtu;
242 	char newreq;
243 	int ret = 0;
244 	int delta;
245 
246 	ohdr = &priv->s_hdr->u.oth;
247 	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
248 		ohdr = &priv->s_hdr->u.l.oth;
249 
250 	/* Sending responses has higher priority over sending requests. */
251 	if ((qp->s_flags & RVT_S_RESP_PENDING) &&
252 	    qib_make_rc_ack(dev, qp, ohdr, pmtu))
253 		goto done;
254 
255 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
256 		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
257 			goto bail;
258 		/* We are in the error state, flush the work request. */
259 		smp_read_barrier_depends(); /* see post_one_send() */
260 		if (qp->s_last == ACCESS_ONCE(qp->s_head))
261 			goto bail;
262 		/* If DMAs are in progress, we can't flush immediately. */
263 		if (atomic_read(&priv->s_dma_busy)) {
264 			qp->s_flags |= RVT_S_WAIT_DMA;
265 			goto bail;
266 		}
267 		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
268 		qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
269 			IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
270 		/* will get called again */
271 		goto done;
272 	}
273 
274 	if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK))
275 		goto bail;
276 
277 	if (qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) {
278 		if (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
279 			qp->s_flags |= RVT_S_WAIT_PSN;
280 			goto bail;
281 		}
282 		qp->s_sending_psn = qp->s_psn;
283 		qp->s_sending_hpsn = qp->s_psn - 1;
284 	}
285 
286 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
287 	hwords = 5;
288 	bth0 = 0;
289 
290 	/* Send a request. */
291 	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
292 	switch (qp->s_state) {
293 	default:
294 		if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
295 			goto bail;
296 		/*
297 		 * Resend an old request or start a new one.
298 		 *
299 		 * We keep track of the current SWQE so that
300 		 * we don't reset the "furthest progress" state
301 		 * if we need to back up.
302 		 */
303 		newreq = 0;
304 		if (qp->s_cur == qp->s_tail) {
305 			/* Check if send work queue is empty. */
306 			if (qp->s_tail == qp->s_head)
307 				goto bail;
308 			/*
309 			 * If a fence is requested, wait for previous
310 			 * RDMA read and atomic operations to finish.
311 			 */
312 			if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
313 			    qp->s_num_rd_atomic) {
314 				qp->s_flags |= RVT_S_WAIT_FENCE;
315 				goto bail;
316 			}
317 			newreq = 1;
318 			qp->s_psn = wqe->psn;
319 		}
320 		/*
321 		 * Note that we have to be careful not to modify the
322 		 * original work request since we may need to resend
323 		 * it.
324 		 */
325 		len = wqe->length;
326 		ss = &qp->s_sge;
327 		bth2 = qp->s_psn & QIB_PSN_MASK;
328 		switch (wqe->wr.opcode) {
329 		case IB_WR_SEND:
330 		case IB_WR_SEND_WITH_IMM:
331 			/* If no credit, return. */
332 			if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
333 			    qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
334 				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
335 				goto bail;
336 			}
337 			if (len > pmtu) {
338 				qp->s_state = OP(SEND_FIRST);
339 				len = pmtu;
340 				break;
341 			}
342 			if (wqe->wr.opcode == IB_WR_SEND)
343 				qp->s_state = OP(SEND_ONLY);
344 			else {
345 				qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
346 				/* Immediate data comes after the BTH */
347 				ohdr->u.imm_data = wqe->wr.ex.imm_data;
348 				hwords += 1;
349 			}
350 			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
351 				bth0 |= IB_BTH_SOLICITED;
352 			bth2 |= IB_BTH_REQ_ACK;
353 			if (++qp->s_cur == qp->s_size)
354 				qp->s_cur = 0;
355 			break;
356 
357 		case IB_WR_RDMA_WRITE:
358 			if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
359 				qp->s_lsn++;
360 			/* FALLTHROUGH */
361 		case IB_WR_RDMA_WRITE_WITH_IMM:
362 			/* If no credit, return. */
363 			if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
364 			    qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
365 				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
366 				goto bail;
367 			}
368 
369 			ohdr->u.rc.reth.vaddr =
370 				cpu_to_be64(wqe->rdma_wr.remote_addr);
371 			ohdr->u.rc.reth.rkey =
372 				cpu_to_be32(wqe->rdma_wr.rkey);
373 			ohdr->u.rc.reth.length = cpu_to_be32(len);
374 			hwords += sizeof(struct ib_reth) / sizeof(u32);
375 			if (len > pmtu) {
376 				qp->s_state = OP(RDMA_WRITE_FIRST);
377 				len = pmtu;
378 				break;
379 			}
380 			if (wqe->rdma_wr.wr.opcode == IB_WR_RDMA_WRITE)
381 				qp->s_state = OP(RDMA_WRITE_ONLY);
382 			else {
383 				qp->s_state = OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
384 				/* Immediate data comes after RETH */
385 				ohdr->u.rc.imm_data =
386 					wqe->rdma_wr.wr.ex.imm_data;
387 				hwords += 1;
388 				if (wqe->rdma_wr.wr.send_flags & IB_SEND_SOLICITED)
389 					bth0 |= IB_BTH_SOLICITED;
390 			}
391 			bth2 |= IB_BTH_REQ_ACK;
392 			if (++qp->s_cur == qp->s_size)
393 				qp->s_cur = 0;
394 			break;
395 
396 		case IB_WR_RDMA_READ:
397 			/*
398 			 * Don't allow more operations to be started
399 			 * than the QP limits allow.
400 			 */
401 			if (newreq) {
402 				if (qp->s_num_rd_atomic >=
403 				    qp->s_max_rd_atomic) {
404 					qp->s_flags |= RVT_S_WAIT_RDMAR;
405 					goto bail;
406 				}
407 				qp->s_num_rd_atomic++;
408 				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
409 					qp->s_lsn++;
410 			}
411 
412 			ohdr->u.rc.reth.vaddr =
413 				cpu_to_be64(wqe->rdma_wr.remote_addr);
414 			ohdr->u.rc.reth.rkey =
415 				cpu_to_be32(wqe->rdma_wr.rkey);
416 			ohdr->u.rc.reth.length = cpu_to_be32(len);
417 			qp->s_state = OP(RDMA_READ_REQUEST);
418 			hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
419 			ss = NULL;
420 			len = 0;
421 			bth2 |= IB_BTH_REQ_ACK;
422 			if (++qp->s_cur == qp->s_size)
423 				qp->s_cur = 0;
424 			break;
425 
426 		case IB_WR_ATOMIC_CMP_AND_SWP:
427 		case IB_WR_ATOMIC_FETCH_AND_ADD:
428 			/*
429 			 * Don't allow more operations to be started
430 			 * than the QP limits allow.
431 			 */
432 			if (newreq) {
433 				if (qp->s_num_rd_atomic >=
434 				    qp->s_max_rd_atomic) {
435 					qp->s_flags |= RVT_S_WAIT_RDMAR;
436 					goto bail;
437 				}
438 				qp->s_num_rd_atomic++;
439 				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
440 					qp->s_lsn++;
441 			}
442 			if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
443 				qp->s_state = OP(COMPARE_SWAP);
444 				put_ib_ateth_swap(wqe->atomic_wr.swap,
445 						  &ohdr->u.atomic_eth);
446 				put_ib_ateth_swap(wqe->atomic_wr.compare_add,
447 						  &ohdr->u.atomic_eth);
448 			} else {
449 				qp->s_state = OP(FETCH_ADD);
450 				put_ib_ateth_swap(wqe->atomic_wr.compare_add,
451 						  &ohdr->u.atomic_eth);
452 				put_ib_ateth_swap(0, &ohdr->u.atomic_eth);
453 			}
454 			put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr,
455 					   &ohdr->u.atomic_eth);
456 			ohdr->u.atomic_eth.rkey = cpu_to_be32(
457 				wqe->atomic_wr.rkey);
458 			hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
459 			ss = NULL;
460 			len = 0;
461 			bth2 |= IB_BTH_REQ_ACK;
462 			if (++qp->s_cur == qp->s_size)
463 				qp->s_cur = 0;
464 			break;
465 
466 		default:
467 			goto bail;
468 		}
469 		qp->s_sge.sge = wqe->sg_list[0];
470 		qp->s_sge.sg_list = wqe->sg_list + 1;
471 		qp->s_sge.num_sge = wqe->wr.num_sge;
472 		qp->s_sge.total_len = wqe->length;
473 		qp->s_len = wqe->length;
474 		if (newreq) {
475 			qp->s_tail++;
476 			if (qp->s_tail >= qp->s_size)
477 				qp->s_tail = 0;
478 		}
479 		if (wqe->wr.opcode == IB_WR_RDMA_READ)
480 			qp->s_psn = wqe->lpsn + 1;
481 		else
482 			qp->s_psn++;
483 		break;
484 
485 	case OP(RDMA_READ_RESPONSE_FIRST):
486 		/*
487 		 * qp->s_state is normally set to the opcode of the
488 		 * last packet constructed for new requests and therefore
489 		 * is never set to RDMA read response.
490 		 * RDMA_READ_RESPONSE_FIRST is used by the ACK processing
491 		 * thread to indicate a SEND needs to be restarted from an
492 		 * earlier PSN without interferring with the sending thread.
493 		 * See qib_restart_rc().
494 		 */
495 		qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
496 		/* FALLTHROUGH */
497 	case OP(SEND_FIRST):
498 		qp->s_state = OP(SEND_MIDDLE);
499 		/* FALLTHROUGH */
500 	case OP(SEND_MIDDLE):
501 		bth2 = qp->s_psn++ & QIB_PSN_MASK;
502 		ss = &qp->s_sge;
503 		len = qp->s_len;
504 		if (len > pmtu) {
505 			len = pmtu;
506 			break;
507 		}
508 		if (wqe->wr.opcode == IB_WR_SEND)
509 			qp->s_state = OP(SEND_LAST);
510 		else {
511 			qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
512 			/* Immediate data comes after the BTH */
513 			ohdr->u.imm_data = wqe->wr.ex.imm_data;
514 			hwords += 1;
515 		}
516 		if (wqe->wr.send_flags & IB_SEND_SOLICITED)
517 			bth0 |= IB_BTH_SOLICITED;
518 		bth2 |= IB_BTH_REQ_ACK;
519 		qp->s_cur++;
520 		if (qp->s_cur >= qp->s_size)
521 			qp->s_cur = 0;
522 		break;
523 
524 	case OP(RDMA_READ_RESPONSE_LAST):
525 		/*
526 		 * qp->s_state is normally set to the opcode of the
527 		 * last packet constructed for new requests and therefore
528 		 * is never set to RDMA read response.
529 		 * RDMA_READ_RESPONSE_LAST is used by the ACK processing
530 		 * thread to indicate a RDMA write needs to be restarted from
531 		 * an earlier PSN without interferring with the sending thread.
532 		 * See qib_restart_rc().
533 		 */
534 		qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
535 		/* FALLTHROUGH */
536 	case OP(RDMA_WRITE_FIRST):
537 		qp->s_state = OP(RDMA_WRITE_MIDDLE);
538 		/* FALLTHROUGH */
539 	case OP(RDMA_WRITE_MIDDLE):
540 		bth2 = qp->s_psn++ & QIB_PSN_MASK;
541 		ss = &qp->s_sge;
542 		len = qp->s_len;
543 		if (len > pmtu) {
544 			len = pmtu;
545 			break;
546 		}
547 		if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
548 			qp->s_state = OP(RDMA_WRITE_LAST);
549 		else {
550 			qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
551 			/* Immediate data comes after the BTH */
552 			ohdr->u.imm_data = wqe->wr.ex.imm_data;
553 			hwords += 1;
554 			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
555 				bth0 |= IB_BTH_SOLICITED;
556 		}
557 		bth2 |= IB_BTH_REQ_ACK;
558 		qp->s_cur++;
559 		if (qp->s_cur >= qp->s_size)
560 			qp->s_cur = 0;
561 		break;
562 
563 	case OP(RDMA_READ_RESPONSE_MIDDLE):
564 		/*
565 		 * qp->s_state is normally set to the opcode of the
566 		 * last packet constructed for new requests and therefore
567 		 * is never set to RDMA read response.
568 		 * RDMA_READ_RESPONSE_MIDDLE is used by the ACK processing
569 		 * thread to indicate a RDMA read needs to be restarted from
570 		 * an earlier PSN without interferring with the sending thread.
571 		 * See qib_restart_rc().
572 		 */
573 		len = ((qp->s_psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
574 		ohdr->u.rc.reth.vaddr =
575 			cpu_to_be64(wqe->rdma_wr.remote_addr + len);
576 		ohdr->u.rc.reth.rkey =
577 			cpu_to_be32(wqe->rdma_wr.rkey);
578 		ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
579 		qp->s_state = OP(RDMA_READ_REQUEST);
580 		hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
581 		bth2 = (qp->s_psn & QIB_PSN_MASK) | IB_BTH_REQ_ACK;
582 		qp->s_psn = wqe->lpsn + 1;
583 		ss = NULL;
584 		len = 0;
585 		qp->s_cur++;
586 		if (qp->s_cur == qp->s_size)
587 			qp->s_cur = 0;
588 		break;
589 	}
590 	qp->s_sending_hpsn = bth2;
591 	delta = (((int) bth2 - (int) wqe->psn) << 8) >> 8;
592 	if (delta && delta % QIB_PSN_CREDIT == 0)
593 		bth2 |= IB_BTH_REQ_ACK;
594 	if (qp->s_flags & RVT_S_SEND_ONE) {
595 		qp->s_flags &= ~RVT_S_SEND_ONE;
596 		qp->s_flags |= RVT_S_WAIT_ACK;
597 		bth2 |= IB_BTH_REQ_ACK;
598 	}
599 	qp->s_len -= len;
600 	qp->s_hdrwords = hwords;
601 	qp->s_cur_sge = ss;
602 	qp->s_cur_size = len;
603 	qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2);
604 done:
605 	return 1;
606 bail:
607 	qp->s_flags &= ~RVT_S_BUSY;
608 	return ret;
609 }
610 
611 /**
612  * qib_send_rc_ack - Construct an ACK packet and send it
613  * @qp: a pointer to the QP
614  *
615  * This is called from qib_rc_rcv() and qib_kreceive().
616  * Note that RDMA reads and atomics are handled in the
617  * send side QP state and tasklet.
618  */
619 void qib_send_rc_ack(struct rvt_qp *qp)
620 {
621 	struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
622 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
623 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
624 	u64 pbc;
625 	u16 lrh0;
626 	u32 bth0;
627 	u32 hwords;
628 	u32 pbufn;
629 	u32 __iomem *piobuf;
630 	struct ib_header hdr;
631 	struct ib_other_headers *ohdr;
632 	u32 control;
633 	unsigned long flags;
634 
635 	spin_lock_irqsave(&qp->s_lock, flags);
636 
637 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
638 		goto unlock;
639 
640 	/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
641 	if ((qp->s_flags & RVT_S_RESP_PENDING) || qp->s_rdma_ack_cnt)
642 		goto queue_ack;
643 
644 	/* Construct the header with s_lock held so APM doesn't change it. */
645 	ohdr = &hdr.u.oth;
646 	lrh0 = QIB_LRH_BTH;
647 	/* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
648 	hwords = 6;
649 	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
650 		hwords += qib_make_grh(ibp, &hdr.u.l.grh,
651 				       &qp->remote_ah_attr.grh, hwords, 0);
652 		ohdr = &hdr.u.l.oth;
653 		lrh0 = QIB_LRH_GRH;
654 	}
655 	/* read pkey_index w/o lock (its atomic) */
656 	bth0 = qib_get_pkey(ibp, qp->s_pkey_index) | (OP(ACKNOWLEDGE) << 24);
657 	if (qp->s_mig_state == IB_MIG_MIGRATED)
658 		bth0 |= IB_BTH_MIG_REQ;
659 	if (qp->r_nak_state)
660 		ohdr->u.aeth = cpu_to_be32((qp->r_msn & QIB_MSN_MASK) |
661 					    (qp->r_nak_state <<
662 					     QIB_AETH_CREDIT_SHIFT));
663 	else
664 		ohdr->u.aeth = qib_compute_aeth(qp);
665 	lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
666 		qp->remote_ah_attr.sl << 4;
667 	hdr.lrh[0] = cpu_to_be16(lrh0);
668 	hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
669 	hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
670 	hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits);
671 	ohdr->bth[0] = cpu_to_be32(bth0);
672 	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
673 	ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & QIB_PSN_MASK);
674 
675 	spin_unlock_irqrestore(&qp->s_lock, flags);
676 
677 	/* Don't try to send ACKs if the link isn't ACTIVE */
678 	if (!(ppd->lflags & QIBL_LINKACTIVE))
679 		goto done;
680 
681 	control = dd->f_setpbc_control(ppd, hwords + SIZE_OF_CRC,
682 				       qp->s_srate, lrh0 >> 12);
683 	/* length is + 1 for the control dword */
684 	pbc = ((u64) control << 32) | (hwords + 1);
685 
686 	piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn);
687 	if (!piobuf) {
688 		/*
689 		 * We are out of PIO buffers at the moment.
690 		 * Pass responsibility for sending the ACK to the
691 		 * send tasklet so that when a PIO buffer becomes
692 		 * available, the ACK is sent ahead of other outgoing
693 		 * packets.
694 		 */
695 		spin_lock_irqsave(&qp->s_lock, flags);
696 		goto queue_ack;
697 	}
698 
699 	/*
700 	 * Write the pbc.
701 	 * We have to flush after the PBC for correctness
702 	 * on some cpus or WC buffer can be written out of order.
703 	 */
704 	writeq(pbc, piobuf);
705 
706 	if (dd->flags & QIB_PIO_FLUSH_WC) {
707 		u32 *hdrp = (u32 *) &hdr;
708 
709 		qib_flush_wc();
710 		qib_pio_copy(piobuf + 2, hdrp, hwords - 1);
711 		qib_flush_wc();
712 		__raw_writel(hdrp[hwords - 1], piobuf + hwords + 1);
713 	} else
714 		qib_pio_copy(piobuf + 2, (u32 *) &hdr, hwords);
715 
716 	if (dd->flags & QIB_USE_SPCL_TRIG) {
717 		u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
718 
719 		qib_flush_wc();
720 		__raw_writel(0xaebecede, piobuf + spcl_off);
721 	}
722 
723 	qib_flush_wc();
724 	qib_sendbuf_done(dd, pbufn);
725 
726 	this_cpu_inc(ibp->pmastats->n_unicast_xmit);
727 	goto done;
728 
729 queue_ack:
730 	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
731 		this_cpu_inc(*ibp->rvp.rc_qacks);
732 		qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
733 		qp->s_nak_state = qp->r_nak_state;
734 		qp->s_ack_psn = qp->r_ack_psn;
735 
736 		/* Schedule the send tasklet. */
737 		qib_schedule_send(qp);
738 	}
739 unlock:
740 	spin_unlock_irqrestore(&qp->s_lock, flags);
741 done:
742 	return;
743 }
744 
745 /**
746  * reset_psn - reset the QP state to send starting from PSN
747  * @qp: the QP
748  * @psn: the packet sequence number to restart at
749  *
750  * This is called from qib_rc_rcv() to process an incoming RC ACK
751  * for the given QP.
752  * Called at interrupt level with the QP s_lock held.
753  */
754 static void reset_psn(struct rvt_qp *qp, u32 psn)
755 {
756 	u32 n = qp->s_acked;
757 	struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
758 	u32 opcode;
759 
760 	qp->s_cur = n;
761 
762 	/*
763 	 * If we are starting the request from the beginning,
764 	 * let the normal send code handle initialization.
765 	 */
766 	if (qib_cmp24(psn, wqe->psn) <= 0) {
767 		qp->s_state = OP(SEND_LAST);
768 		goto done;
769 	}
770 
771 	/* Find the work request opcode corresponding to the given PSN. */
772 	opcode = wqe->wr.opcode;
773 	for (;;) {
774 		int diff;
775 
776 		if (++n == qp->s_size)
777 			n = 0;
778 		if (n == qp->s_tail)
779 			break;
780 		wqe = rvt_get_swqe_ptr(qp, n);
781 		diff = qib_cmp24(psn, wqe->psn);
782 		if (diff < 0)
783 			break;
784 		qp->s_cur = n;
785 		/*
786 		 * If we are starting the request from the beginning,
787 		 * let the normal send code handle initialization.
788 		 */
789 		if (diff == 0) {
790 			qp->s_state = OP(SEND_LAST);
791 			goto done;
792 		}
793 		opcode = wqe->wr.opcode;
794 	}
795 
796 	/*
797 	 * Set the state to restart in the middle of a request.
798 	 * Don't change the s_sge, s_cur_sge, or s_cur_size.
799 	 * See qib_make_rc_req().
800 	 */
801 	switch (opcode) {
802 	case IB_WR_SEND:
803 	case IB_WR_SEND_WITH_IMM:
804 		qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
805 		break;
806 
807 	case IB_WR_RDMA_WRITE:
808 	case IB_WR_RDMA_WRITE_WITH_IMM:
809 		qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
810 		break;
811 
812 	case IB_WR_RDMA_READ:
813 		qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
814 		break;
815 
816 	default:
817 		/*
818 		 * This case shouldn't happen since its only
819 		 * one PSN per req.
820 		 */
821 		qp->s_state = OP(SEND_LAST);
822 	}
823 done:
824 	qp->s_psn = psn;
825 	/*
826 	 * Set RVT_S_WAIT_PSN as qib_rc_complete() may start the timer
827 	 * asynchronously before the send tasklet can get scheduled.
828 	 * Doing it in qib_make_rc_req() is too late.
829 	 */
830 	if ((qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
831 	    (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
832 		qp->s_flags |= RVT_S_WAIT_PSN;
833 }
834 
835 /*
836  * Back up requester to resend the last un-ACKed request.
837  * The QP r_lock and s_lock should be held and interrupts disabled.
838  */
839 static void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
840 {
841 	struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
842 	struct qib_ibport *ibp;
843 
844 	if (qp->s_retry == 0) {
845 		if (qp->s_mig_state == IB_MIG_ARMED) {
846 			qib_migrate_qp(qp);
847 			qp->s_retry = qp->s_retry_cnt;
848 		} else if (qp->s_last == qp->s_acked) {
849 			qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
850 			rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
851 			return;
852 		} else /* XXX need to handle delayed completion */
853 			return;
854 	} else
855 		qp->s_retry--;
856 
857 	ibp = to_iport(qp->ibqp.device, qp->port_num);
858 	if (wqe->wr.opcode == IB_WR_RDMA_READ)
859 		ibp->rvp.n_rc_resends++;
860 	else
861 		ibp->rvp.n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
862 
863 	qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR |
864 			 RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN |
865 			 RVT_S_WAIT_ACK);
866 	if (wait)
867 		qp->s_flags |= RVT_S_SEND_ONE;
868 	reset_psn(qp, psn);
869 }
870 
871 /*
872  * This is called from s_timer for missing responses.
873  */
874 static void rc_timeout(unsigned long arg)
875 {
876 	struct rvt_qp *qp = (struct rvt_qp *)arg;
877 	struct qib_ibport *ibp;
878 	unsigned long flags;
879 
880 	spin_lock_irqsave(&qp->r_lock, flags);
881 	spin_lock(&qp->s_lock);
882 	if (qp->s_flags & RVT_S_TIMER) {
883 		ibp = to_iport(qp->ibqp.device, qp->port_num);
884 		ibp->rvp.n_rc_timeouts++;
885 		qp->s_flags &= ~RVT_S_TIMER;
886 		del_timer(&qp->s_timer);
887 		qib_restart_rc(qp, qp->s_last_psn + 1, 1);
888 		qib_schedule_send(qp);
889 	}
890 	spin_unlock(&qp->s_lock);
891 	spin_unlock_irqrestore(&qp->r_lock, flags);
892 }
893 
894 /*
895  * This is called from s_timer for RNR timeouts.
896  */
897 void qib_rc_rnr_retry(unsigned long arg)
898 {
899 	struct rvt_qp *qp = (struct rvt_qp *)arg;
900 	unsigned long flags;
901 
902 	spin_lock_irqsave(&qp->s_lock, flags);
903 	if (qp->s_flags & RVT_S_WAIT_RNR) {
904 		qp->s_flags &= ~RVT_S_WAIT_RNR;
905 		del_timer(&qp->s_timer);
906 		qib_schedule_send(qp);
907 	}
908 	spin_unlock_irqrestore(&qp->s_lock, flags);
909 }
910 
911 /*
912  * Set qp->s_sending_psn to the next PSN after the given one.
913  * This would be psn+1 except when RDMA reads are present.
914  */
915 static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
916 {
917 	struct rvt_swqe *wqe;
918 	u32 n = qp->s_last;
919 
920 	/* Find the work request corresponding to the given PSN. */
921 	for (;;) {
922 		wqe = rvt_get_swqe_ptr(qp, n);
923 		if (qib_cmp24(psn, wqe->lpsn) <= 0) {
924 			if (wqe->wr.opcode == IB_WR_RDMA_READ)
925 				qp->s_sending_psn = wqe->lpsn + 1;
926 			else
927 				qp->s_sending_psn = psn + 1;
928 			break;
929 		}
930 		if (++n == qp->s_size)
931 			n = 0;
932 		if (n == qp->s_tail)
933 			break;
934 	}
935 }
936 
937 /*
938  * This should be called with the QP s_lock held and interrupts disabled.
939  */
940 void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
941 {
942 	struct ib_other_headers *ohdr;
943 	struct rvt_swqe *wqe;
944 	struct ib_wc wc;
945 	unsigned i;
946 	u32 opcode;
947 	u32 psn;
948 
949 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
950 		return;
951 
952 	/* Find out where the BTH is */
953 	if ((be16_to_cpu(hdr->lrh[0]) & 3) == QIB_LRH_BTH)
954 		ohdr = &hdr->u.oth;
955 	else
956 		ohdr = &hdr->u.l.oth;
957 
958 	opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
959 	if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
960 	    opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
961 		WARN_ON(!qp->s_rdma_ack_cnt);
962 		qp->s_rdma_ack_cnt--;
963 		return;
964 	}
965 
966 	psn = be32_to_cpu(ohdr->bth[2]);
967 	reset_sending_psn(qp, psn);
968 
969 	/*
970 	 * Start timer after a packet requesting an ACK has been sent and
971 	 * there are still requests that haven't been acked.
972 	 */
973 	if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
974 	    !(qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
975 	    (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
976 		start_timer(qp);
977 
978 	while (qp->s_last != qp->s_acked) {
979 		u32 s_last;
980 
981 		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
982 		if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 &&
983 		    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
984 			break;
985 		s_last = qp->s_last;
986 		if (++s_last >= qp->s_size)
987 			s_last = 0;
988 		qp->s_last = s_last;
989 		/* see post_send() */
990 		barrier();
991 		for (i = 0; i < wqe->wr.num_sge; i++) {
992 			struct rvt_sge *sge = &wqe->sg_list[i];
993 
994 			rvt_put_mr(sge->mr);
995 		}
996 		/* Post a send completion queue entry if requested. */
997 		if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
998 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
999 			memset(&wc, 0, sizeof(wc));
1000 			wc.wr_id = wqe->wr.wr_id;
1001 			wc.status = IB_WC_SUCCESS;
1002 			wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
1003 			wc.byte_len = wqe->length;
1004 			wc.qp = &qp->ibqp;
1005 			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
1006 		}
1007 	}
1008 	/*
1009 	 * If we were waiting for sends to complete before resending,
1010 	 * and they are now complete, restart sending.
1011 	 */
1012 	if (qp->s_flags & RVT_S_WAIT_PSN &&
1013 	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
1014 		qp->s_flags &= ~RVT_S_WAIT_PSN;
1015 		qp->s_sending_psn = qp->s_psn;
1016 		qp->s_sending_hpsn = qp->s_psn - 1;
1017 		qib_schedule_send(qp);
1018 	}
1019 }
1020 
1021 static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
1022 {
1023 	qp->s_last_psn = psn;
1024 }
1025 
1026 /*
1027  * Generate a SWQE completion.
1028  * This is similar to qib_send_complete but has to check to be sure
1029  * that the SGEs are not being referenced if the SWQE is being resent.
1030  */
1031 static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
1032 					 struct rvt_swqe *wqe,
1033 					 struct qib_ibport *ibp)
1034 {
1035 	struct ib_wc wc;
1036 	unsigned i;
1037 
1038 	/*
1039 	 * Don't decrement refcount and don't generate a
1040 	 * completion if the SWQE is being resent until the send
1041 	 * is finished.
1042 	 */
1043 	if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 ||
1044 	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
1045 		u32 s_last;
1046 
1047 		for (i = 0; i < wqe->wr.num_sge; i++) {
1048 			struct rvt_sge *sge = &wqe->sg_list[i];
1049 
1050 			rvt_put_mr(sge->mr);
1051 		}
1052 		s_last = qp->s_last;
1053 		if (++s_last >= qp->s_size)
1054 			s_last = 0;
1055 		qp->s_last = s_last;
1056 		/* see post_send() */
1057 		barrier();
1058 		/* Post a send completion queue entry if requested. */
1059 		if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
1060 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
1061 			memset(&wc, 0, sizeof(wc));
1062 			wc.wr_id = wqe->wr.wr_id;
1063 			wc.status = IB_WC_SUCCESS;
1064 			wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
1065 			wc.byte_len = wqe->length;
1066 			wc.qp = &qp->ibqp;
1067 			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
1068 		}
1069 	} else
1070 		this_cpu_inc(*ibp->rvp.rc_delayed_comp);
1071 
1072 	qp->s_retry = qp->s_retry_cnt;
1073 	update_last_psn(qp, wqe->lpsn);
1074 
1075 	/*
1076 	 * If we are completing a request which is in the process of
1077 	 * being resent, we can stop resending it since we know the
1078 	 * responder has already seen it.
1079 	 */
1080 	if (qp->s_acked == qp->s_cur) {
1081 		if (++qp->s_cur >= qp->s_size)
1082 			qp->s_cur = 0;
1083 		qp->s_acked = qp->s_cur;
1084 		wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
1085 		if (qp->s_acked != qp->s_tail) {
1086 			qp->s_state = OP(SEND_LAST);
1087 			qp->s_psn = wqe->psn;
1088 		}
1089 	} else {
1090 		if (++qp->s_acked >= qp->s_size)
1091 			qp->s_acked = 0;
1092 		if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
1093 			qp->s_draining = 0;
1094 		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1095 	}
1096 	return wqe;
1097 }
1098 
1099 /**
1100  * do_rc_ack - process an incoming RC ACK
1101  * @qp: the QP the ACK came in on
1102  * @psn: the packet sequence number of the ACK
1103  * @opcode: the opcode of the request that resulted in the ACK
1104  *
1105  * This is called from qib_rc_rcv_resp() to process an incoming RC ACK
1106  * for the given QP.
1107  * Called at interrupt level with the QP s_lock held.
1108  * Returns 1 if OK, 0 if current operation should be aborted (NAK).
1109  */
1110 static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
1111 		     u64 val, struct qib_ctxtdata *rcd)
1112 {
1113 	struct qib_ibport *ibp;
1114 	enum ib_wc_status status;
1115 	struct rvt_swqe *wqe;
1116 	int ret = 0;
1117 	u32 ack_psn;
1118 	int diff;
1119 
1120 	/* Remove QP from retry timer */
1121 	if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
1122 		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
1123 		del_timer(&qp->s_timer);
1124 	}
1125 
1126 	/*
1127 	 * Note that NAKs implicitly ACK outstanding SEND and RDMA write
1128 	 * requests and implicitly NAK RDMA read and atomic requests issued
1129 	 * before the NAK'ed request.  The MSN won't include the NAK'ed
1130 	 * request but will include an ACK'ed request(s).
1131 	 */
1132 	ack_psn = psn;
1133 	if (aeth >> 29)
1134 		ack_psn--;
1135 	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1136 	ibp = to_iport(qp->ibqp.device, qp->port_num);
1137 
1138 	/*
1139 	 * The MSN might be for a later WQE than the PSN indicates so
1140 	 * only complete WQEs that the PSN finishes.
1141 	 */
1142 	while ((diff = qib_cmp24(ack_psn, wqe->lpsn)) >= 0) {
1143 		/*
1144 		 * RDMA_READ_RESPONSE_ONLY is a special case since
1145 		 * we want to generate completion events for everything
1146 		 * before the RDMA read, copy the data, then generate
1147 		 * the completion for the read.
1148 		 */
1149 		if (wqe->wr.opcode == IB_WR_RDMA_READ &&
1150 		    opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
1151 		    diff == 0) {
1152 			ret = 1;
1153 			goto bail;
1154 		}
1155 		/*
1156 		 * If this request is a RDMA read or atomic, and the ACK is
1157 		 * for a later operation, this ACK NAKs the RDMA read or
1158 		 * atomic.  In other words, only a RDMA_READ_LAST or ONLY
1159 		 * can ACK a RDMA read and likewise for atomic ops.  Note
1160 		 * that the NAK case can only happen if relaxed ordering is
1161 		 * used and requests are sent after an RDMA read or atomic
1162 		 * is sent but before the response is received.
1163 		 */
1164 		if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
1165 		     (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
1166 		    ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1167 		      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
1168 		     (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
1169 			/* Retry this request. */
1170 			if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
1171 				qp->r_flags |= RVT_R_RDMAR_SEQ;
1172 				qib_restart_rc(qp, qp->s_last_psn + 1, 0);
1173 				if (list_empty(&qp->rspwait)) {
1174 					qp->r_flags |= RVT_R_RSP_SEND;
1175 					rvt_get_qp(qp);
1176 					list_add_tail(&qp->rspwait,
1177 						      &rcd->qp_wait_list);
1178 				}
1179 			}
1180 			/*
1181 			 * No need to process the ACK/NAK since we are
1182 			 * restarting an earlier request.
1183 			 */
1184 			goto bail;
1185 		}
1186 		if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1187 		    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
1188 			u64 *vaddr = wqe->sg_list[0].vaddr;
1189 			*vaddr = val;
1190 		}
1191 		if (qp->s_num_rd_atomic &&
1192 		    (wqe->wr.opcode == IB_WR_RDMA_READ ||
1193 		     wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1194 		     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
1195 			qp->s_num_rd_atomic--;
1196 			/* Restart sending task if fence is complete */
1197 			if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
1198 			    !qp->s_num_rd_atomic) {
1199 				qp->s_flags &= ~(RVT_S_WAIT_FENCE |
1200 						 RVT_S_WAIT_ACK);
1201 				qib_schedule_send(qp);
1202 			} else if (qp->s_flags & RVT_S_WAIT_RDMAR) {
1203 				qp->s_flags &= ~(RVT_S_WAIT_RDMAR |
1204 						 RVT_S_WAIT_ACK);
1205 				qib_schedule_send(qp);
1206 			}
1207 		}
1208 		wqe = do_rc_completion(qp, wqe, ibp);
1209 		if (qp->s_acked == qp->s_tail)
1210 			break;
1211 	}
1212 
1213 	switch (aeth >> 29) {
1214 	case 0:         /* ACK */
1215 		this_cpu_inc(*ibp->rvp.rc_acks);
1216 		if (qp->s_acked != qp->s_tail) {
1217 			/*
1218 			 * We are expecting more ACKs so
1219 			 * reset the retransmit timer.
1220 			 */
1221 			start_timer(qp);
1222 			/*
1223 			 * We can stop resending the earlier packets and
1224 			 * continue with the next packet the receiver wants.
1225 			 */
1226 			if (qib_cmp24(qp->s_psn, psn) <= 0)
1227 				reset_psn(qp, psn + 1);
1228 		} else if (qib_cmp24(qp->s_psn, psn) <= 0) {
1229 			qp->s_state = OP(SEND_LAST);
1230 			qp->s_psn = psn + 1;
1231 		}
1232 		if (qp->s_flags & RVT_S_WAIT_ACK) {
1233 			qp->s_flags &= ~RVT_S_WAIT_ACK;
1234 			qib_schedule_send(qp);
1235 		}
1236 		qib_get_credit(qp, aeth);
1237 		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
1238 		qp->s_retry = qp->s_retry_cnt;
1239 		update_last_psn(qp, psn);
1240 		ret = 1;
1241 		goto bail;
1242 
1243 	case 1:         /* RNR NAK */
1244 		ibp->rvp.n_rnr_naks++;
1245 		if (qp->s_acked == qp->s_tail)
1246 			goto bail;
1247 		if (qp->s_flags & RVT_S_WAIT_RNR)
1248 			goto bail;
1249 		if (qp->s_rnr_retry == 0) {
1250 			status = IB_WC_RNR_RETRY_EXC_ERR;
1251 			goto class_b;
1252 		}
1253 		if (qp->s_rnr_retry_cnt < 7)
1254 			qp->s_rnr_retry--;
1255 
1256 		/* The last valid PSN is the previous PSN. */
1257 		update_last_psn(qp, psn - 1);
1258 
1259 		ibp->rvp.n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
1260 
1261 		reset_psn(qp, psn);
1262 
1263 		qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
1264 		qp->s_flags |= RVT_S_WAIT_RNR;
1265 		qp->s_timer.function = qib_rc_rnr_retry;
1266 		qp->s_timer.expires = jiffies + usecs_to_jiffies(
1267 			ib_qib_rnr_table[(aeth >> QIB_AETH_CREDIT_SHIFT) &
1268 					   QIB_AETH_CREDIT_MASK]);
1269 		add_timer(&qp->s_timer);
1270 		goto bail;
1271 
1272 	case 3:         /* NAK */
1273 		if (qp->s_acked == qp->s_tail)
1274 			goto bail;
1275 		/* The last valid PSN is the previous PSN. */
1276 		update_last_psn(qp, psn - 1);
1277 		switch ((aeth >> QIB_AETH_CREDIT_SHIFT) &
1278 			QIB_AETH_CREDIT_MASK) {
1279 		case 0: /* PSN sequence error */
1280 			ibp->rvp.n_seq_naks++;
1281 			/*
1282 			 * Back up to the responder's expected PSN.
1283 			 * Note that we might get a NAK in the middle of an
1284 			 * RDMA READ response which terminates the RDMA
1285 			 * READ.
1286 			 */
1287 			qib_restart_rc(qp, psn, 0);
1288 			qib_schedule_send(qp);
1289 			break;
1290 
1291 		case 1: /* Invalid Request */
1292 			status = IB_WC_REM_INV_REQ_ERR;
1293 			ibp->rvp.n_other_naks++;
1294 			goto class_b;
1295 
1296 		case 2: /* Remote Access Error */
1297 			status = IB_WC_REM_ACCESS_ERR;
1298 			ibp->rvp.n_other_naks++;
1299 			goto class_b;
1300 
1301 		case 3: /* Remote Operation Error */
1302 			status = IB_WC_REM_OP_ERR;
1303 			ibp->rvp.n_other_naks++;
1304 class_b:
1305 			if (qp->s_last == qp->s_acked) {
1306 				qib_send_complete(qp, wqe, status);
1307 				rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1308 			}
1309 			break;
1310 
1311 		default:
1312 			/* Ignore other reserved NAK error codes */
1313 			goto reserved;
1314 		}
1315 		qp->s_retry = qp->s_retry_cnt;
1316 		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
1317 		goto bail;
1318 
1319 	default:                /* 2: reserved */
1320 reserved:
1321 		/* Ignore reserved NAK codes. */
1322 		goto bail;
1323 	}
1324 
1325 bail:
1326 	return ret;
1327 }
1328 
1329 /*
1330  * We have seen an out of sequence RDMA read middle or last packet.
1331  * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
1332  */
1333 static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn,
1334 			 struct qib_ctxtdata *rcd)
1335 {
1336 	struct rvt_swqe *wqe;
1337 
1338 	/* Remove QP from retry timer */
1339 	if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
1340 		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
1341 		del_timer(&qp->s_timer);
1342 	}
1343 
1344 	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1345 
1346 	while (qib_cmp24(psn, wqe->lpsn) > 0) {
1347 		if (wqe->wr.opcode == IB_WR_RDMA_READ ||
1348 		    wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1349 		    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
1350 			break;
1351 		wqe = do_rc_completion(qp, wqe, ibp);
1352 	}
1353 
1354 	ibp->rvp.n_rdma_seq++;
1355 	qp->r_flags |= RVT_R_RDMAR_SEQ;
1356 	qib_restart_rc(qp, qp->s_last_psn + 1, 0);
1357 	if (list_empty(&qp->rspwait)) {
1358 		qp->r_flags |= RVT_R_RSP_SEND;
1359 		rvt_get_qp(qp);
1360 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
1361 	}
1362 }
1363 
1364 /**
1365  * qib_rc_rcv_resp - process an incoming RC response packet
1366  * @ibp: the port this packet came in on
1367  * @ohdr: the other headers for this packet
1368  * @data: the packet data
1369  * @tlen: the packet length
1370  * @qp: the QP for this packet
1371  * @opcode: the opcode for this packet
1372  * @psn: the packet sequence number for this packet
1373  * @hdrsize: the header length
1374  * @pmtu: the path MTU
1375  *
1376  * This is called from qib_rc_rcv() to process an incoming RC response
1377  * packet for the given QP.
1378  * Called at interrupt level.
1379  */
1380 static void qib_rc_rcv_resp(struct qib_ibport *ibp,
1381 			    struct ib_other_headers *ohdr,
1382 			    void *data, u32 tlen,
1383 			    struct rvt_qp *qp,
1384 			    u32 opcode,
1385 			    u32 psn, u32 hdrsize, u32 pmtu,
1386 			    struct qib_ctxtdata *rcd)
1387 {
1388 	struct rvt_swqe *wqe;
1389 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1390 	enum ib_wc_status status;
1391 	unsigned long flags;
1392 	int diff;
1393 	u32 pad;
1394 	u32 aeth;
1395 	u64 val;
1396 
1397 	if (opcode != OP(RDMA_READ_RESPONSE_MIDDLE)) {
1398 		/*
1399 		 * If ACK'd PSN on SDMA busy list try to make progress to
1400 		 * reclaim SDMA credits.
1401 		 */
1402 		if ((qib_cmp24(psn, qp->s_sending_psn) >= 0) &&
1403 		    (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) {
1404 
1405 			/*
1406 			 * If send tasklet not running attempt to progress
1407 			 * SDMA queue.
1408 			 */
1409 			if (!(qp->s_flags & RVT_S_BUSY)) {
1410 				/* Acquire SDMA Lock */
1411 				spin_lock_irqsave(&ppd->sdma_lock, flags);
1412 				/* Invoke sdma make progress */
1413 				qib_sdma_make_progress(ppd);
1414 				/* Release SDMA Lock */
1415 				spin_unlock_irqrestore(&ppd->sdma_lock, flags);
1416 			}
1417 		}
1418 	}
1419 
1420 	spin_lock_irqsave(&qp->s_lock, flags);
1421 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
1422 		goto ack_done;
1423 
1424 	/* Ignore invalid responses. */
1425 	smp_read_barrier_depends(); /* see post_one_send */
1426 	if (qib_cmp24(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
1427 		goto ack_done;
1428 
1429 	/* Ignore duplicate responses. */
1430 	diff = qib_cmp24(psn, qp->s_last_psn);
1431 	if (unlikely(diff <= 0)) {
1432 		/* Update credits for "ghost" ACKs */
1433 		if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
1434 			aeth = be32_to_cpu(ohdr->u.aeth);
1435 			if ((aeth >> 29) == 0)
1436 				qib_get_credit(qp, aeth);
1437 		}
1438 		goto ack_done;
1439 	}
1440 
1441 	/*
1442 	 * Skip everything other than the PSN we expect, if we are waiting
1443 	 * for a reply to a restarted RDMA read or atomic op.
1444 	 */
1445 	if (qp->r_flags & RVT_R_RDMAR_SEQ) {
1446 		if (qib_cmp24(psn, qp->s_last_psn + 1) != 0)
1447 			goto ack_done;
1448 		qp->r_flags &= ~RVT_R_RDMAR_SEQ;
1449 	}
1450 
1451 	if (unlikely(qp->s_acked == qp->s_tail))
1452 		goto ack_done;
1453 	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1454 	status = IB_WC_SUCCESS;
1455 
1456 	switch (opcode) {
1457 	case OP(ACKNOWLEDGE):
1458 	case OP(ATOMIC_ACKNOWLEDGE):
1459 	case OP(RDMA_READ_RESPONSE_FIRST):
1460 		aeth = be32_to_cpu(ohdr->u.aeth);
1461 		if (opcode == OP(ATOMIC_ACKNOWLEDGE))
1462 			val = ib_u64_get(&ohdr->u.at.atomic_ack_eth);
1463 		else
1464 			val = 0;
1465 		if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
1466 		    opcode != OP(RDMA_READ_RESPONSE_FIRST))
1467 			goto ack_done;
1468 		hdrsize += 4;
1469 		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1470 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1471 			goto ack_op_err;
1472 		/*
1473 		 * If this is a response to a resent RDMA read, we
1474 		 * have to be careful to copy the data to the right
1475 		 * location.
1476 		 */
1477 		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1478 						  wqe, psn, pmtu);
1479 		goto read_middle;
1480 
1481 	case OP(RDMA_READ_RESPONSE_MIDDLE):
1482 		/* no AETH, no ACK */
1483 		if (unlikely(qib_cmp24(psn, qp->s_last_psn + 1)))
1484 			goto ack_seq_err;
1485 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1486 			goto ack_op_err;
1487 read_middle:
1488 		if (unlikely(tlen != (hdrsize + pmtu + 4)))
1489 			goto ack_len_err;
1490 		if (unlikely(pmtu >= qp->s_rdma_read_len))
1491 			goto ack_len_err;
1492 
1493 		/*
1494 		 * We got a response so update the timeout.
1495 		 * 4.096 usec. * (1 << qp->timeout)
1496 		 */
1497 		qp->s_flags |= RVT_S_TIMER;
1498 		mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
1499 		if (qp->s_flags & RVT_S_WAIT_ACK) {
1500 			qp->s_flags &= ~RVT_S_WAIT_ACK;
1501 			qib_schedule_send(qp);
1502 		}
1503 
1504 		if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
1505 			qp->s_retry = qp->s_retry_cnt;
1506 
1507 		/*
1508 		 * Update the RDMA receive state but do the copy w/o
1509 		 * holding the locks and blocking interrupts.
1510 		 */
1511 		qp->s_rdma_read_len -= pmtu;
1512 		update_last_psn(qp, psn);
1513 		spin_unlock_irqrestore(&qp->s_lock, flags);
1514 		qib_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0);
1515 		goto bail;
1516 
1517 	case OP(RDMA_READ_RESPONSE_ONLY):
1518 		aeth = be32_to_cpu(ohdr->u.aeth);
1519 		if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
1520 			goto ack_done;
1521 		/* Get the number of bytes the message was padded by. */
1522 		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1523 		/*
1524 		 * Check that the data size is >= 0 && <= pmtu.
1525 		 * Remember to account for the AETH header (4) and
1526 		 * ICRC (4).
1527 		 */
1528 		if (unlikely(tlen < (hdrsize + pad + 8)))
1529 			goto ack_len_err;
1530 		/*
1531 		 * If this is a response to a resent RDMA read, we
1532 		 * have to be careful to copy the data to the right
1533 		 * location.
1534 		 */
1535 		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1536 		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1537 						  wqe, psn, pmtu);
1538 		goto read_last;
1539 
1540 	case OP(RDMA_READ_RESPONSE_LAST):
1541 		/* ACKs READ req. */
1542 		if (unlikely(qib_cmp24(psn, qp->s_last_psn + 1)))
1543 			goto ack_seq_err;
1544 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1545 			goto ack_op_err;
1546 		/* Get the number of bytes the message was padded by. */
1547 		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1548 		/*
1549 		 * Check that the data size is >= 1 && <= pmtu.
1550 		 * Remember to account for the AETH header (4) and
1551 		 * ICRC (4).
1552 		 */
1553 		if (unlikely(tlen <= (hdrsize + pad + 8)))
1554 			goto ack_len_err;
1555 read_last:
1556 		tlen -= hdrsize + pad + 8;
1557 		if (unlikely(tlen != qp->s_rdma_read_len))
1558 			goto ack_len_err;
1559 		aeth = be32_to_cpu(ohdr->u.aeth);
1560 		qib_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0);
1561 		WARN_ON(qp->s_rdma_read_sge.num_sge);
1562 		(void) do_rc_ack(qp, aeth, psn,
1563 				 OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
1564 		goto ack_done;
1565 	}
1566 
1567 ack_op_err:
1568 	status = IB_WC_LOC_QP_OP_ERR;
1569 	goto ack_err;
1570 
1571 ack_seq_err:
1572 	rdma_seq_err(qp, ibp, psn, rcd);
1573 	goto ack_done;
1574 
1575 ack_len_err:
1576 	status = IB_WC_LOC_LEN_ERR;
1577 ack_err:
1578 	if (qp->s_last == qp->s_acked) {
1579 		qib_send_complete(qp, wqe, status);
1580 		rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1581 	}
1582 ack_done:
1583 	spin_unlock_irqrestore(&qp->s_lock, flags);
1584 bail:
1585 	return;
1586 }
1587 
1588 /**
1589  * qib_rc_rcv_error - process an incoming duplicate or error RC packet
1590  * @ohdr: the other headers for this packet
1591  * @data: the packet data
1592  * @qp: the QP for this packet
1593  * @opcode: the opcode for this packet
1594  * @psn: the packet sequence number for this packet
1595  * @diff: the difference between the PSN and the expected PSN
1596  *
1597  * This is called from qib_rc_rcv() to process an unexpected
1598  * incoming RC packet for the given QP.
1599  * Called at interrupt level.
1600  * Return 1 if no more processing is needed; otherwise return 0 to
1601  * schedule a response to be sent.
1602  */
1603 static int qib_rc_rcv_error(struct ib_other_headers *ohdr,
1604 			    void *data,
1605 			    struct rvt_qp *qp,
1606 			    u32 opcode,
1607 			    u32 psn,
1608 			    int diff,
1609 			    struct qib_ctxtdata *rcd)
1610 {
1611 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
1612 	struct rvt_ack_entry *e;
1613 	unsigned long flags;
1614 	u8 i, prev;
1615 	int old_req;
1616 
1617 	if (diff > 0) {
1618 		/*
1619 		 * Packet sequence error.
1620 		 * A NAK will ACK earlier sends and RDMA writes.
1621 		 * Don't queue the NAK if we already sent one.
1622 		 */
1623 		if (!qp->r_nak_state) {
1624 			ibp->rvp.n_rc_seqnak++;
1625 			qp->r_nak_state = IB_NAK_PSN_ERROR;
1626 			/* Use the expected PSN. */
1627 			qp->r_ack_psn = qp->r_psn;
1628 			/*
1629 			 * Wait to send the sequence NAK until all packets
1630 			 * in the receive queue have been processed.
1631 			 * Otherwise, we end up propagating congestion.
1632 			 */
1633 			if (list_empty(&qp->rspwait)) {
1634 				qp->r_flags |= RVT_R_RSP_NAK;
1635 				rvt_get_qp(qp);
1636 				list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
1637 			}
1638 		}
1639 		goto done;
1640 	}
1641 
1642 	/*
1643 	 * Handle a duplicate request.  Don't re-execute SEND, RDMA
1644 	 * write or atomic op.  Don't NAK errors, just silently drop
1645 	 * the duplicate request.  Note that r_sge, r_len, and
1646 	 * r_rcv_len may be in use so don't modify them.
1647 	 *
1648 	 * We are supposed to ACK the earliest duplicate PSN but we
1649 	 * can coalesce an outstanding duplicate ACK.  We have to
1650 	 * send the earliest so that RDMA reads can be restarted at
1651 	 * the requester's expected PSN.
1652 	 *
1653 	 * First, find where this duplicate PSN falls within the
1654 	 * ACKs previously sent.
1655 	 * old_req is true if there is an older response that is scheduled
1656 	 * to be sent before sending this one.
1657 	 */
1658 	e = NULL;
1659 	old_req = 1;
1660 	ibp->rvp.n_rc_dupreq++;
1661 
1662 	spin_lock_irqsave(&qp->s_lock, flags);
1663 
1664 	for (i = qp->r_head_ack_queue; ; i = prev) {
1665 		if (i == qp->s_tail_ack_queue)
1666 			old_req = 0;
1667 		if (i)
1668 			prev = i - 1;
1669 		else
1670 			prev = QIB_MAX_RDMA_ATOMIC;
1671 		if (prev == qp->r_head_ack_queue) {
1672 			e = NULL;
1673 			break;
1674 		}
1675 		e = &qp->s_ack_queue[prev];
1676 		if (!e->opcode) {
1677 			e = NULL;
1678 			break;
1679 		}
1680 		if (qib_cmp24(psn, e->psn) >= 0) {
1681 			if (prev == qp->s_tail_ack_queue &&
1682 			    qib_cmp24(psn, e->lpsn) <= 0)
1683 				old_req = 0;
1684 			break;
1685 		}
1686 	}
1687 	switch (opcode) {
1688 	case OP(RDMA_READ_REQUEST): {
1689 		struct ib_reth *reth;
1690 		u32 offset;
1691 		u32 len;
1692 
1693 		/*
1694 		 * If we didn't find the RDMA read request in the ack queue,
1695 		 * we can ignore this request.
1696 		 */
1697 		if (!e || e->opcode != OP(RDMA_READ_REQUEST))
1698 			goto unlock_done;
1699 		/* RETH comes after BTH */
1700 		reth = &ohdr->u.rc.reth;
1701 		/*
1702 		 * Address range must be a subset of the original
1703 		 * request and start on pmtu boundaries.
1704 		 * We reuse the old ack_queue slot since the requester
1705 		 * should not back up and request an earlier PSN for the
1706 		 * same request.
1707 		 */
1708 		offset = ((psn - e->psn) & QIB_PSN_MASK) *
1709 			qp->pmtu;
1710 		len = be32_to_cpu(reth->length);
1711 		if (unlikely(offset + len != e->rdma_sge.sge_length))
1712 			goto unlock_done;
1713 		if (e->rdma_sge.mr) {
1714 			rvt_put_mr(e->rdma_sge.mr);
1715 			e->rdma_sge.mr = NULL;
1716 		}
1717 		if (len != 0) {
1718 			u32 rkey = be32_to_cpu(reth->rkey);
1719 			u64 vaddr = be64_to_cpu(reth->vaddr);
1720 			int ok;
1721 
1722 			ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
1723 					 IB_ACCESS_REMOTE_READ);
1724 			if (unlikely(!ok))
1725 				goto unlock_done;
1726 		} else {
1727 			e->rdma_sge.vaddr = NULL;
1728 			e->rdma_sge.length = 0;
1729 			e->rdma_sge.sge_length = 0;
1730 		}
1731 		e->psn = psn;
1732 		if (old_req)
1733 			goto unlock_done;
1734 		qp->s_tail_ack_queue = prev;
1735 		break;
1736 	}
1737 
1738 	case OP(COMPARE_SWAP):
1739 	case OP(FETCH_ADD): {
1740 		/*
1741 		 * If we didn't find the atomic request in the ack queue
1742 		 * or the send tasklet is already backed up to send an
1743 		 * earlier entry, we can ignore this request.
1744 		 */
1745 		if (!e || e->opcode != (u8) opcode || old_req)
1746 			goto unlock_done;
1747 		qp->s_tail_ack_queue = prev;
1748 		break;
1749 	}
1750 
1751 	default:
1752 		/*
1753 		 * Ignore this operation if it doesn't request an ACK
1754 		 * or an earlier RDMA read or atomic is going to be resent.
1755 		 */
1756 		if (!(psn & IB_BTH_REQ_ACK) || old_req)
1757 			goto unlock_done;
1758 		/*
1759 		 * Resend the most recent ACK if this request is
1760 		 * after all the previous RDMA reads and atomics.
1761 		 */
1762 		if (i == qp->r_head_ack_queue) {
1763 			spin_unlock_irqrestore(&qp->s_lock, flags);
1764 			qp->r_nak_state = 0;
1765 			qp->r_ack_psn = qp->r_psn - 1;
1766 			goto send_ack;
1767 		}
1768 		/*
1769 		 * Try to send a simple ACK to work around a Mellanox bug
1770 		 * which doesn't accept a RDMA read response or atomic
1771 		 * response as an ACK for earlier SENDs or RDMA writes.
1772 		 */
1773 		if (!(qp->s_flags & RVT_S_RESP_PENDING)) {
1774 			spin_unlock_irqrestore(&qp->s_lock, flags);
1775 			qp->r_nak_state = 0;
1776 			qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
1777 			goto send_ack;
1778 		}
1779 		/*
1780 		 * Resend the RDMA read or atomic op which
1781 		 * ACKs this duplicate request.
1782 		 */
1783 		qp->s_tail_ack_queue = i;
1784 		break;
1785 	}
1786 	qp->s_ack_state = OP(ACKNOWLEDGE);
1787 	qp->s_flags |= RVT_S_RESP_PENDING;
1788 	qp->r_nak_state = 0;
1789 	qib_schedule_send(qp);
1790 
1791 unlock_done:
1792 	spin_unlock_irqrestore(&qp->s_lock, flags);
1793 done:
1794 	return 1;
1795 
1796 send_ack:
1797 	return 0;
1798 }
1799 
1800 void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
1801 {
1802 	unsigned long flags;
1803 	int lastwqe;
1804 
1805 	spin_lock_irqsave(&qp->s_lock, flags);
1806 	lastwqe = rvt_error_qp(qp, err);
1807 	spin_unlock_irqrestore(&qp->s_lock, flags);
1808 
1809 	if (lastwqe) {
1810 		struct ib_event ev;
1811 
1812 		ev.device = qp->ibqp.device;
1813 		ev.element.qp = &qp->ibqp;
1814 		ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
1815 		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1816 	}
1817 }
1818 
1819 static inline void qib_update_ack_queue(struct rvt_qp *qp, unsigned n)
1820 {
1821 	unsigned next;
1822 
1823 	next = n + 1;
1824 	if (next > QIB_MAX_RDMA_ATOMIC)
1825 		next = 0;
1826 	qp->s_tail_ack_queue = next;
1827 	qp->s_ack_state = OP(ACKNOWLEDGE);
1828 }
1829 
1830 /**
1831  * qib_rc_rcv - process an incoming RC packet
1832  * @rcd: the context pointer
1833  * @hdr: the header of this packet
1834  * @has_grh: true if the header has a GRH
1835  * @data: the packet data
1836  * @tlen: the packet length
1837  * @qp: the QP for this packet
1838  *
1839  * This is called from qib_qp_rcv() to process an incoming RC packet
1840  * for the given QP.
1841  * Called at interrupt level.
1842  */
1843 void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
1844 		int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
1845 {
1846 	struct qib_ibport *ibp = &rcd->ppd->ibport_data;
1847 	struct ib_other_headers *ohdr;
1848 	u32 opcode;
1849 	u32 hdrsize;
1850 	u32 psn;
1851 	u32 pad;
1852 	struct ib_wc wc;
1853 	u32 pmtu = qp->pmtu;
1854 	int diff;
1855 	struct ib_reth *reth;
1856 	unsigned long flags;
1857 	int ret;
1858 
1859 	/* Check for GRH */
1860 	if (!has_grh) {
1861 		ohdr = &hdr->u.oth;
1862 		hdrsize = 8 + 12;       /* LRH + BTH */
1863 	} else {
1864 		ohdr = &hdr->u.l.oth;
1865 		hdrsize = 8 + 40 + 12;  /* LRH + GRH + BTH */
1866 	}
1867 
1868 	opcode = be32_to_cpu(ohdr->bth[0]);
1869 	if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode))
1870 		return;
1871 
1872 	psn = be32_to_cpu(ohdr->bth[2]);
1873 	opcode >>= 24;
1874 
1875 	/*
1876 	 * Process responses (ACKs) before anything else.  Note that the
1877 	 * packet sequence number will be for something in the send work
1878 	 * queue rather than the expected receive packet sequence number.
1879 	 * In other words, this QP is the requester.
1880 	 */
1881 	if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
1882 	    opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
1883 		qib_rc_rcv_resp(ibp, ohdr, data, tlen, qp, opcode, psn,
1884 				hdrsize, pmtu, rcd);
1885 		return;
1886 	}
1887 
1888 	/* Compute 24 bits worth of difference. */
1889 	diff = qib_cmp24(psn, qp->r_psn);
1890 	if (unlikely(diff)) {
1891 		if (qib_rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd))
1892 			return;
1893 		goto send_ack;
1894 	}
1895 
1896 	/* Check for opcode sequence errors. */
1897 	switch (qp->r_state) {
1898 	case OP(SEND_FIRST):
1899 	case OP(SEND_MIDDLE):
1900 		if (opcode == OP(SEND_MIDDLE) ||
1901 		    opcode == OP(SEND_LAST) ||
1902 		    opcode == OP(SEND_LAST_WITH_IMMEDIATE))
1903 			break;
1904 		goto nack_inv;
1905 
1906 	case OP(RDMA_WRITE_FIRST):
1907 	case OP(RDMA_WRITE_MIDDLE):
1908 		if (opcode == OP(RDMA_WRITE_MIDDLE) ||
1909 		    opcode == OP(RDMA_WRITE_LAST) ||
1910 		    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
1911 			break;
1912 		goto nack_inv;
1913 
1914 	default:
1915 		if (opcode == OP(SEND_MIDDLE) ||
1916 		    opcode == OP(SEND_LAST) ||
1917 		    opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
1918 		    opcode == OP(RDMA_WRITE_MIDDLE) ||
1919 		    opcode == OP(RDMA_WRITE_LAST) ||
1920 		    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
1921 			goto nack_inv;
1922 		/*
1923 		 * Note that it is up to the requester to not send a new
1924 		 * RDMA read or atomic operation before receiving an ACK
1925 		 * for the previous operation.
1926 		 */
1927 		break;
1928 	}
1929 
1930 	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) {
1931 		qp->r_flags |= RVT_R_COMM_EST;
1932 		if (qp->ibqp.event_handler) {
1933 			struct ib_event ev;
1934 
1935 			ev.device = qp->ibqp.device;
1936 			ev.element.qp = &qp->ibqp;
1937 			ev.event = IB_EVENT_COMM_EST;
1938 			qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1939 		}
1940 	}
1941 
1942 	/* OK, process the packet. */
1943 	switch (opcode) {
1944 	case OP(SEND_FIRST):
1945 		ret = qib_get_rwqe(qp, 0);
1946 		if (ret < 0)
1947 			goto nack_op_err;
1948 		if (!ret)
1949 			goto rnr_nak;
1950 		qp->r_rcv_len = 0;
1951 		/* FALLTHROUGH */
1952 	case OP(SEND_MIDDLE):
1953 	case OP(RDMA_WRITE_MIDDLE):
1954 send_middle:
1955 		/* Check for invalid length PMTU or posted rwqe len. */
1956 		if (unlikely(tlen != (hdrsize + pmtu + 4)))
1957 			goto nack_inv;
1958 		qp->r_rcv_len += pmtu;
1959 		if (unlikely(qp->r_rcv_len > qp->r_len))
1960 			goto nack_inv;
1961 		qib_copy_sge(&qp->r_sge, data, pmtu, 1);
1962 		break;
1963 
1964 	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
1965 		/* consume RWQE */
1966 		ret = qib_get_rwqe(qp, 1);
1967 		if (ret < 0)
1968 			goto nack_op_err;
1969 		if (!ret)
1970 			goto rnr_nak;
1971 		goto send_last_imm;
1972 
1973 	case OP(SEND_ONLY):
1974 	case OP(SEND_ONLY_WITH_IMMEDIATE):
1975 		ret = qib_get_rwqe(qp, 0);
1976 		if (ret < 0)
1977 			goto nack_op_err;
1978 		if (!ret)
1979 			goto rnr_nak;
1980 		qp->r_rcv_len = 0;
1981 		if (opcode == OP(SEND_ONLY))
1982 			goto no_immediate_data;
1983 		/* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */
1984 	case OP(SEND_LAST_WITH_IMMEDIATE):
1985 send_last_imm:
1986 		wc.ex.imm_data = ohdr->u.imm_data;
1987 		hdrsize += 4;
1988 		wc.wc_flags = IB_WC_WITH_IMM;
1989 		goto send_last;
1990 	case OP(SEND_LAST):
1991 	case OP(RDMA_WRITE_LAST):
1992 no_immediate_data:
1993 		wc.wc_flags = 0;
1994 		wc.ex.imm_data = 0;
1995 send_last:
1996 		/* Get the number of bytes the message was padded by. */
1997 		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1998 		/* Check for invalid length. */
1999 		/* XXX LAST len should be >= 1 */
2000 		if (unlikely(tlen < (hdrsize + pad + 4)))
2001 			goto nack_inv;
2002 		/* Don't count the CRC. */
2003 		tlen -= (hdrsize + pad + 4);
2004 		wc.byte_len = tlen + qp->r_rcv_len;
2005 		if (unlikely(wc.byte_len > qp->r_len))
2006 			goto nack_inv;
2007 		qib_copy_sge(&qp->r_sge, data, tlen, 1);
2008 		rvt_put_ss(&qp->r_sge);
2009 		qp->r_msn++;
2010 		if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
2011 			break;
2012 		wc.wr_id = qp->r_wr_id;
2013 		wc.status = IB_WC_SUCCESS;
2014 		if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
2015 		    opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
2016 			wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
2017 		else
2018 			wc.opcode = IB_WC_RECV;
2019 		wc.qp = &qp->ibqp;
2020 		wc.src_qp = qp->remote_qpn;
2021 		wc.slid = qp->remote_ah_attr.dlid;
2022 		wc.sl = qp->remote_ah_attr.sl;
2023 		/* zero fields that are N/A */
2024 		wc.vendor_err = 0;
2025 		wc.pkey_index = 0;
2026 		wc.dlid_path_bits = 0;
2027 		wc.port_num = 0;
2028 		/* Signal completion event if the solicited bit is set. */
2029 		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
2030 			     (ohdr->bth[0] &
2031 			      cpu_to_be32(IB_BTH_SOLICITED)) != 0);
2032 		break;
2033 
2034 	case OP(RDMA_WRITE_FIRST):
2035 	case OP(RDMA_WRITE_ONLY):
2036 	case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
2037 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
2038 			goto nack_inv;
2039 		/* consume RWQE */
2040 		reth = &ohdr->u.rc.reth;
2041 		hdrsize += sizeof(*reth);
2042 		qp->r_len = be32_to_cpu(reth->length);
2043 		qp->r_rcv_len = 0;
2044 		qp->r_sge.sg_list = NULL;
2045 		if (qp->r_len != 0) {
2046 			u32 rkey = be32_to_cpu(reth->rkey);
2047 			u64 vaddr = be64_to_cpu(reth->vaddr);
2048 			int ok;
2049 
2050 			/* Check rkey & NAK */
2051 			ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
2052 					 rkey, IB_ACCESS_REMOTE_WRITE);
2053 			if (unlikely(!ok))
2054 				goto nack_acc;
2055 			qp->r_sge.num_sge = 1;
2056 		} else {
2057 			qp->r_sge.num_sge = 0;
2058 			qp->r_sge.sge.mr = NULL;
2059 			qp->r_sge.sge.vaddr = NULL;
2060 			qp->r_sge.sge.length = 0;
2061 			qp->r_sge.sge.sge_length = 0;
2062 		}
2063 		if (opcode == OP(RDMA_WRITE_FIRST))
2064 			goto send_middle;
2065 		else if (opcode == OP(RDMA_WRITE_ONLY))
2066 			goto no_immediate_data;
2067 		ret = qib_get_rwqe(qp, 1);
2068 		if (ret < 0)
2069 			goto nack_op_err;
2070 		if (!ret)
2071 			goto rnr_nak;
2072 		wc.ex.imm_data = ohdr->u.rc.imm_data;
2073 		hdrsize += 4;
2074 		wc.wc_flags = IB_WC_WITH_IMM;
2075 		goto send_last;
2076 
2077 	case OP(RDMA_READ_REQUEST): {
2078 		struct rvt_ack_entry *e;
2079 		u32 len;
2080 		u8 next;
2081 
2082 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
2083 			goto nack_inv;
2084 		next = qp->r_head_ack_queue + 1;
2085 		/* s_ack_queue is size QIB_MAX_RDMA_ATOMIC+1 so use > not >= */
2086 		if (next > QIB_MAX_RDMA_ATOMIC)
2087 			next = 0;
2088 		spin_lock_irqsave(&qp->s_lock, flags);
2089 		if (unlikely(next == qp->s_tail_ack_queue)) {
2090 			if (!qp->s_ack_queue[next].sent)
2091 				goto nack_inv_unlck;
2092 			qib_update_ack_queue(qp, next);
2093 		}
2094 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
2095 		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
2096 			rvt_put_mr(e->rdma_sge.mr);
2097 			e->rdma_sge.mr = NULL;
2098 		}
2099 		reth = &ohdr->u.rc.reth;
2100 		len = be32_to_cpu(reth->length);
2101 		if (len) {
2102 			u32 rkey = be32_to_cpu(reth->rkey);
2103 			u64 vaddr = be64_to_cpu(reth->vaddr);
2104 			int ok;
2105 
2106 			/* Check rkey & NAK */
2107 			ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr,
2108 					 rkey, IB_ACCESS_REMOTE_READ);
2109 			if (unlikely(!ok))
2110 				goto nack_acc_unlck;
2111 			/*
2112 			 * Update the next expected PSN.  We add 1 later
2113 			 * below, so only add the remainder here.
2114 			 */
2115 			if (len > pmtu)
2116 				qp->r_psn += (len - 1) / pmtu;
2117 		} else {
2118 			e->rdma_sge.mr = NULL;
2119 			e->rdma_sge.vaddr = NULL;
2120 			e->rdma_sge.length = 0;
2121 			e->rdma_sge.sge_length = 0;
2122 		}
2123 		e->opcode = opcode;
2124 		e->sent = 0;
2125 		e->psn = psn;
2126 		e->lpsn = qp->r_psn;
2127 		/*
2128 		 * We need to increment the MSN here instead of when we
2129 		 * finish sending the result since a duplicate request would
2130 		 * increment it more than once.
2131 		 */
2132 		qp->r_msn++;
2133 		qp->r_psn++;
2134 		qp->r_state = opcode;
2135 		qp->r_nak_state = 0;
2136 		qp->r_head_ack_queue = next;
2137 
2138 		/* Schedule the send tasklet. */
2139 		qp->s_flags |= RVT_S_RESP_PENDING;
2140 		qib_schedule_send(qp);
2141 
2142 		goto sunlock;
2143 	}
2144 
2145 	case OP(COMPARE_SWAP):
2146 	case OP(FETCH_ADD): {
2147 		struct ib_atomic_eth *ateth;
2148 		struct rvt_ack_entry *e;
2149 		u64 vaddr;
2150 		atomic64_t *maddr;
2151 		u64 sdata;
2152 		u32 rkey;
2153 		u8 next;
2154 
2155 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
2156 			goto nack_inv;
2157 		next = qp->r_head_ack_queue + 1;
2158 		if (next > QIB_MAX_RDMA_ATOMIC)
2159 			next = 0;
2160 		spin_lock_irqsave(&qp->s_lock, flags);
2161 		if (unlikely(next == qp->s_tail_ack_queue)) {
2162 			if (!qp->s_ack_queue[next].sent)
2163 				goto nack_inv_unlck;
2164 			qib_update_ack_queue(qp, next);
2165 		}
2166 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
2167 		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
2168 			rvt_put_mr(e->rdma_sge.mr);
2169 			e->rdma_sge.mr = NULL;
2170 		}
2171 		ateth = &ohdr->u.atomic_eth;
2172 		vaddr = get_ib_ateth_vaddr(ateth);
2173 		if (unlikely(vaddr & (sizeof(u64) - 1)))
2174 			goto nack_inv_unlck;
2175 		rkey = be32_to_cpu(ateth->rkey);
2176 		/* Check rkey & NAK */
2177 		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
2178 					  vaddr, rkey,
2179 					  IB_ACCESS_REMOTE_ATOMIC)))
2180 			goto nack_acc_unlck;
2181 		/* Perform atomic OP and save result. */
2182 		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
2183 		sdata = get_ib_ateth_swap(ateth);
2184 		e->atomic_data = (opcode == OP(FETCH_ADD)) ?
2185 			(u64) atomic64_add_return(sdata, maddr) - sdata :
2186 			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
2187 				      get_ib_ateth_compare(ateth),
2188 				      sdata);
2189 		rvt_put_mr(qp->r_sge.sge.mr);
2190 		qp->r_sge.num_sge = 0;
2191 		e->opcode = opcode;
2192 		e->sent = 0;
2193 		e->psn = psn;
2194 		e->lpsn = psn;
2195 		qp->r_msn++;
2196 		qp->r_psn++;
2197 		qp->r_state = opcode;
2198 		qp->r_nak_state = 0;
2199 		qp->r_head_ack_queue = next;
2200 
2201 		/* Schedule the send tasklet. */
2202 		qp->s_flags |= RVT_S_RESP_PENDING;
2203 		qib_schedule_send(qp);
2204 
2205 		goto sunlock;
2206 	}
2207 
2208 	default:
2209 		/* NAK unknown opcodes. */
2210 		goto nack_inv;
2211 	}
2212 	qp->r_psn++;
2213 	qp->r_state = opcode;
2214 	qp->r_ack_psn = psn;
2215 	qp->r_nak_state = 0;
2216 	/* Send an ACK if requested or required. */
2217 	if (psn & (1 << 31))
2218 		goto send_ack;
2219 	return;
2220 
2221 rnr_nak:
2222 	qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
2223 	qp->r_ack_psn = qp->r_psn;
2224 	/* Queue RNR NAK for later */
2225 	if (list_empty(&qp->rspwait)) {
2226 		qp->r_flags |= RVT_R_RSP_NAK;
2227 		rvt_get_qp(qp);
2228 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
2229 	}
2230 	return;
2231 
2232 nack_op_err:
2233 	qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
2234 	qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
2235 	qp->r_ack_psn = qp->r_psn;
2236 	/* Queue NAK for later */
2237 	if (list_empty(&qp->rspwait)) {
2238 		qp->r_flags |= RVT_R_RSP_NAK;
2239 		rvt_get_qp(qp);
2240 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
2241 	}
2242 	return;
2243 
2244 nack_inv_unlck:
2245 	spin_unlock_irqrestore(&qp->s_lock, flags);
2246 nack_inv:
2247 	qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
2248 	qp->r_nak_state = IB_NAK_INVALID_REQUEST;
2249 	qp->r_ack_psn = qp->r_psn;
2250 	/* Queue NAK for later */
2251 	if (list_empty(&qp->rspwait)) {
2252 		qp->r_flags |= RVT_R_RSP_NAK;
2253 		rvt_get_qp(qp);
2254 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
2255 	}
2256 	return;
2257 
2258 nack_acc_unlck:
2259 	spin_unlock_irqrestore(&qp->s_lock, flags);
2260 nack_acc:
2261 	qib_rc_error(qp, IB_WC_LOC_PROT_ERR);
2262 	qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
2263 	qp->r_ack_psn = qp->r_psn;
2264 send_ack:
2265 	qib_send_rc_ack(qp);
2266 	return;
2267 
2268 sunlock:
2269 	spin_unlock_irqrestore(&qp->s_lock, flags);
2270 }
2271