1 /* 2 * Copyright(c) 2015 - 2017 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 #include <linux/spinlock.h> 49 50 #include "hfi.h" 51 #include "mad.h" 52 #include "qp.h" 53 #include "verbs_txreq.h" 54 #include "trace.h" 55 56 /* 57 * Validate a RWQE and fill in the SGE state. 58 * Return 1 if OK. 59 */ 60 static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) 61 { 62 int i, j, ret; 63 struct ib_wc wc; 64 struct rvt_lkey_table *rkt; 65 struct rvt_pd *pd; 66 struct rvt_sge_state *ss; 67 68 rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table; 69 pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd); 70 ss = &qp->r_sge; 71 ss->sg_list = qp->r_sg_list; 72 qp->r_len = 0; 73 for (i = j = 0; i < wqe->num_sge; i++) { 74 if (wqe->sg_list[i].length == 0) 75 continue; 76 /* Check LKEY */ 77 if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, 78 &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) 79 goto bad_lkey; 80 qp->r_len += wqe->sg_list[i].length; 81 j++; 82 } 83 ss->num_sge = j; 84 ss->total_len = qp->r_len; 85 ret = 1; 86 goto bail; 87 88 bad_lkey: 89 while (j) { 90 struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; 91 92 rvt_put_mr(sge->mr); 93 } 94 ss->num_sge = 0; 95 memset(&wc, 0, sizeof(wc)); 96 wc.wr_id = wqe->wr_id; 97 wc.status = IB_WC_LOC_PROT_ERR; 98 wc.opcode = IB_WC_RECV; 99 wc.qp = &qp->ibqp; 100 /* Signal solicited completion event. */ 101 rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); 102 ret = 0; 103 bail: 104 return ret; 105 } 106 107 /** 108 * hfi1_rvt_get_rwqe - copy the next RWQE into the QP's RWQE 109 * @qp: the QP 110 * @wr_id_only: update qp->r_wr_id only, not qp->r_sge 111 * 112 * Return -1 if there is a local error, 0 if no RWQE is available, 113 * otherwise return 1. 114 * 115 * Can be called from interrupt level. 116 */ 117 int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only) 118 { 119 unsigned long flags; 120 struct rvt_rq *rq; 121 struct rvt_rwq *wq; 122 struct rvt_srq *srq; 123 struct rvt_rwqe *wqe; 124 void (*handler)(struct ib_event *, void *); 125 u32 tail; 126 int ret; 127 128 if (qp->ibqp.srq) { 129 srq = ibsrq_to_rvtsrq(qp->ibqp.srq); 130 handler = srq->ibsrq.event_handler; 131 rq = &srq->rq; 132 } else { 133 srq = NULL; 134 handler = NULL; 135 rq = &qp->r_rq; 136 } 137 138 spin_lock_irqsave(&rq->lock, flags); 139 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) { 140 ret = 0; 141 goto unlock; 142 } 143 144 wq = rq->wq; 145 tail = wq->tail; 146 /* Validate tail before using it since it is user writable. */ 147 if (tail >= rq->size) 148 tail = 0; 149 if (unlikely(tail == wq->head)) { 150 ret = 0; 151 goto unlock; 152 } 153 /* Make sure entry is read after head index is read. */ 154 smp_rmb(); 155 wqe = rvt_get_rwqe_ptr(rq, tail); 156 /* 157 * Even though we update the tail index in memory, the verbs 158 * consumer is not supposed to post more entries until a 159 * completion is generated. 160 */ 161 if (++tail >= rq->size) 162 tail = 0; 163 wq->tail = tail; 164 if (!wr_id_only && !init_sge(qp, wqe)) { 165 ret = -1; 166 goto unlock; 167 } 168 qp->r_wr_id = wqe->wr_id; 169 170 ret = 1; 171 set_bit(RVT_R_WRID_VALID, &qp->r_aflags); 172 if (handler) { 173 u32 n; 174 175 /* 176 * Validate head pointer value and compute 177 * the number of remaining WQEs. 178 */ 179 n = wq->head; 180 if (n >= rq->size) 181 n = 0; 182 if (n < tail) 183 n += rq->size - tail; 184 else 185 n -= tail; 186 if (n < srq->limit) { 187 struct ib_event ev; 188 189 srq->limit = 0; 190 spin_unlock_irqrestore(&rq->lock, flags); 191 ev.device = qp->ibqp.device; 192 ev.element.srq = qp->ibqp.srq; 193 ev.event = IB_EVENT_SRQ_LIMIT_REACHED; 194 handler(&ev, srq->ibsrq.srq_context); 195 goto bail; 196 } 197 } 198 unlock: 199 spin_unlock_irqrestore(&rq->lock, flags); 200 bail: 201 return ret; 202 } 203 204 static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) 205 { 206 return (gid->global.interface_id == id && 207 (gid->global.subnet_prefix == gid_prefix || 208 gid->global.subnet_prefix == IB_DEFAULT_GID_PREFIX)); 209 } 210 211 /* 212 * 213 * This should be called with the QP r_lock held. 214 * 215 * The s_lock will be acquired around the hfi1_migrate_qp() call. 216 */ 217 int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, 218 int has_grh, struct rvt_qp *qp, u32 bth0) 219 { 220 __be64 guid; 221 unsigned long flags; 222 u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; 223 224 if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) { 225 if (!has_grh) { 226 if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) & 227 IB_AH_GRH) 228 goto err; 229 } else { 230 const struct ib_global_route *grh; 231 232 if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) & 233 IB_AH_GRH)) 234 goto err; 235 grh = rdma_ah_read_grh(&qp->alt_ah_attr); 236 guid = get_sguid(ibp, grh->sgid_index); 237 if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix, 238 guid)) 239 goto err; 240 if (!gid_ok( 241 &hdr->u.l.grh.sgid, 242 grh->dgid.global.subnet_prefix, 243 grh->dgid.global.interface_id)) 244 goto err; 245 } 246 if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, 247 ib_get_slid(hdr)))) { 248 hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, 249 (u16)bth0, 250 ib_get_sl(hdr), 251 0, qp->ibqp.qp_num, 252 ib_get_slid(hdr), 253 ib_get_dlid(hdr)); 254 goto err; 255 } 256 /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ 257 if (ib_get_slid(hdr) != 258 rdma_ah_get_dlid(&qp->alt_ah_attr) || 259 ppd_from_ibp(ibp)->port != 260 rdma_ah_get_port_num(&qp->alt_ah_attr)) 261 goto err; 262 spin_lock_irqsave(&qp->s_lock, flags); 263 hfi1_migrate_qp(qp); 264 spin_unlock_irqrestore(&qp->s_lock, flags); 265 } else { 266 if (!has_grh) { 267 if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & 268 IB_AH_GRH) 269 goto err; 270 } else { 271 const struct ib_global_route *grh; 272 273 if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & 274 IB_AH_GRH)) 275 goto err; 276 grh = rdma_ah_read_grh(&qp->remote_ah_attr); 277 guid = get_sguid(ibp, grh->sgid_index); 278 if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix, 279 guid)) 280 goto err; 281 if (!gid_ok( 282 &hdr->u.l.grh.sgid, 283 grh->dgid.global.subnet_prefix, 284 grh->dgid.global.interface_id)) 285 goto err; 286 } 287 if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5, 288 ib_get_slid(hdr)))) { 289 hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, 290 (u16)bth0, 291 ib_get_sl(hdr), 292 0, qp->ibqp.qp_num, 293 ib_get_slid(hdr), 294 ib_get_dlid(hdr)); 295 goto err; 296 } 297 /* Validate the SLID. See Ch. 9.6.1.5 */ 298 if (ib_get_slid(hdr) != 299 rdma_ah_get_dlid(&qp->remote_ah_attr) || 300 ppd_from_ibp(ibp)->port != qp->port_num) 301 goto err; 302 if (qp->s_mig_state == IB_MIG_REARM && 303 !(bth0 & IB_BTH_MIG_REQ)) 304 qp->s_mig_state = IB_MIG_ARMED; 305 } 306 307 return 0; 308 309 err: 310 return 1; 311 } 312 313 /** 314 * ruc_loopback - handle UC and RC loopback requests 315 * @sqp: the sending QP 316 * 317 * This is called from hfi1_do_send() to 318 * forward a WQE addressed to the same HFI. 319 * Note that although we are single threaded due to the send engine, we still 320 * have to protect against post_send(). We don't have to worry about 321 * receive interrupts since this is a connected protocol and all packets 322 * will pass through here. 323 */ 324 static void ruc_loopback(struct rvt_qp *sqp) 325 { 326 struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num); 327 struct rvt_qp *qp; 328 struct rvt_swqe *wqe; 329 struct rvt_sge *sge; 330 unsigned long flags; 331 struct ib_wc wc; 332 u64 sdata; 333 atomic64_t *maddr; 334 enum ib_wc_status send_status; 335 bool release; 336 int ret; 337 bool copy_last = false; 338 int local_ops = 0; 339 340 rcu_read_lock(); 341 342 /* 343 * Note that we check the responder QP state after 344 * checking the requester's state. 345 */ 346 qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp, 347 sqp->remote_qpn); 348 349 spin_lock_irqsave(&sqp->s_lock, flags); 350 351 /* Return if we are already busy processing a work request. */ 352 if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) || 353 !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND)) 354 goto unlock; 355 356 sqp->s_flags |= RVT_S_BUSY; 357 358 again: 359 smp_read_barrier_depends(); /* see post_one_send() */ 360 if (sqp->s_last == ACCESS_ONCE(sqp->s_head)) 361 goto clr_busy; 362 wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); 363 364 /* Return if it is not OK to start a new work request. */ 365 if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) { 366 if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND)) 367 goto clr_busy; 368 /* We are in the error state, flush the work request. */ 369 send_status = IB_WC_WR_FLUSH_ERR; 370 goto flush_send; 371 } 372 373 /* 374 * We can rely on the entry not changing without the s_lock 375 * being held until we update s_last. 376 * We increment s_cur to indicate s_last is in progress. 377 */ 378 if (sqp->s_last == sqp->s_cur) { 379 if (++sqp->s_cur >= sqp->s_size) 380 sqp->s_cur = 0; 381 } 382 spin_unlock_irqrestore(&sqp->s_lock, flags); 383 384 if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) || 385 qp->ibqp.qp_type != sqp->ibqp.qp_type) { 386 ibp->rvp.n_pkt_drops++; 387 /* 388 * For RC, the requester would timeout and retry so 389 * shortcut the timeouts and just signal too many retries. 390 */ 391 if (sqp->ibqp.qp_type == IB_QPT_RC) 392 send_status = IB_WC_RETRY_EXC_ERR; 393 else 394 send_status = IB_WC_SUCCESS; 395 goto serr; 396 } 397 398 memset(&wc, 0, sizeof(wc)); 399 send_status = IB_WC_SUCCESS; 400 401 release = true; 402 sqp->s_sge.sge = wqe->sg_list[0]; 403 sqp->s_sge.sg_list = wqe->sg_list + 1; 404 sqp->s_sge.num_sge = wqe->wr.num_sge; 405 sqp->s_len = wqe->length; 406 switch (wqe->wr.opcode) { 407 case IB_WR_REG_MR: 408 goto send_comp; 409 410 case IB_WR_LOCAL_INV: 411 if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) { 412 if (rvt_invalidate_rkey(sqp, 413 wqe->wr.ex.invalidate_rkey)) 414 send_status = IB_WC_LOC_PROT_ERR; 415 local_ops = 1; 416 } 417 goto send_comp; 418 419 case IB_WR_SEND_WITH_INV: 420 if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) { 421 wc.wc_flags = IB_WC_WITH_INVALIDATE; 422 wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey; 423 } 424 goto send; 425 426 case IB_WR_SEND_WITH_IMM: 427 wc.wc_flags = IB_WC_WITH_IMM; 428 wc.ex.imm_data = wqe->wr.ex.imm_data; 429 /* FALLTHROUGH */ 430 case IB_WR_SEND: 431 send: 432 ret = hfi1_rvt_get_rwqe(qp, 0); 433 if (ret < 0) 434 goto op_err; 435 if (!ret) 436 goto rnr_nak; 437 break; 438 439 case IB_WR_RDMA_WRITE_WITH_IMM: 440 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) 441 goto inv_err; 442 wc.wc_flags = IB_WC_WITH_IMM; 443 wc.ex.imm_data = wqe->wr.ex.imm_data; 444 ret = hfi1_rvt_get_rwqe(qp, 1); 445 if (ret < 0) 446 goto op_err; 447 if (!ret) 448 goto rnr_nak; 449 /* skip copy_last set and qp_access_flags recheck */ 450 goto do_write; 451 case IB_WR_RDMA_WRITE: 452 copy_last = rvt_is_user_qp(qp); 453 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) 454 goto inv_err; 455 do_write: 456 if (wqe->length == 0) 457 break; 458 if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length, 459 wqe->rdma_wr.remote_addr, 460 wqe->rdma_wr.rkey, 461 IB_ACCESS_REMOTE_WRITE))) 462 goto acc_err; 463 qp->r_sge.sg_list = NULL; 464 qp->r_sge.num_sge = 1; 465 qp->r_sge.total_len = wqe->length; 466 break; 467 468 case IB_WR_RDMA_READ: 469 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) 470 goto inv_err; 471 if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, 472 wqe->rdma_wr.remote_addr, 473 wqe->rdma_wr.rkey, 474 IB_ACCESS_REMOTE_READ))) 475 goto acc_err; 476 release = false; 477 sqp->s_sge.sg_list = NULL; 478 sqp->s_sge.num_sge = 1; 479 qp->r_sge.sge = wqe->sg_list[0]; 480 qp->r_sge.sg_list = wqe->sg_list + 1; 481 qp->r_sge.num_sge = wqe->wr.num_sge; 482 qp->r_sge.total_len = wqe->length; 483 break; 484 485 case IB_WR_ATOMIC_CMP_AND_SWP: 486 case IB_WR_ATOMIC_FETCH_AND_ADD: 487 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) 488 goto inv_err; 489 if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), 490 wqe->atomic_wr.remote_addr, 491 wqe->atomic_wr.rkey, 492 IB_ACCESS_REMOTE_ATOMIC))) 493 goto acc_err; 494 /* Perform atomic OP and save result. */ 495 maddr = (atomic64_t *)qp->r_sge.sge.vaddr; 496 sdata = wqe->atomic_wr.compare_add; 497 *(u64 *)sqp->s_sge.sge.vaddr = 498 (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? 499 (u64)atomic64_add_return(sdata, maddr) - sdata : 500 (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr, 501 sdata, wqe->atomic_wr.swap); 502 rvt_put_mr(qp->r_sge.sge.mr); 503 qp->r_sge.num_sge = 0; 504 goto send_comp; 505 506 default: 507 send_status = IB_WC_LOC_QP_OP_ERR; 508 goto serr; 509 } 510 511 sge = &sqp->s_sge.sge; 512 while (sqp->s_len) { 513 u32 len = sqp->s_len; 514 515 if (len > sge->length) 516 len = sge->length; 517 if (len > sge->sge_length) 518 len = sge->sge_length; 519 WARN_ON_ONCE(len == 0); 520 hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last); 521 sge->vaddr += len; 522 sge->length -= len; 523 sge->sge_length -= len; 524 if (sge->sge_length == 0) { 525 if (!release) 526 rvt_put_mr(sge->mr); 527 if (--sqp->s_sge.num_sge) 528 *sge = *sqp->s_sge.sg_list++; 529 } else if (sge->length == 0 && sge->mr->lkey) { 530 if (++sge->n >= RVT_SEGSZ) { 531 if (++sge->m >= sge->mr->mapsz) 532 break; 533 sge->n = 0; 534 } 535 sge->vaddr = 536 sge->mr->map[sge->m]->segs[sge->n].vaddr; 537 sge->length = 538 sge->mr->map[sge->m]->segs[sge->n].length; 539 } 540 sqp->s_len -= len; 541 } 542 if (release) 543 rvt_put_ss(&qp->r_sge); 544 545 if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) 546 goto send_comp; 547 548 if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) 549 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; 550 else 551 wc.opcode = IB_WC_RECV; 552 wc.wr_id = qp->r_wr_id; 553 wc.status = IB_WC_SUCCESS; 554 wc.byte_len = wqe->length; 555 wc.qp = &qp->ibqp; 556 wc.src_qp = qp->remote_qpn; 557 wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr); 558 wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); 559 wc.port_num = 1; 560 /* Signal completion event if the solicited bit is set. */ 561 rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 562 wqe->wr.send_flags & IB_SEND_SOLICITED); 563 564 send_comp: 565 spin_lock_irqsave(&sqp->s_lock, flags); 566 ibp->rvp.n_loop_pkts++; 567 flush_send: 568 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; 569 hfi1_send_complete(sqp, wqe, send_status); 570 if (local_ops) { 571 atomic_dec(&sqp->local_ops_pending); 572 local_ops = 0; 573 } 574 goto again; 575 576 rnr_nak: 577 /* Handle RNR NAK */ 578 if (qp->ibqp.qp_type == IB_QPT_UC) 579 goto send_comp; 580 ibp->rvp.n_rnr_naks++; 581 /* 582 * Note: we don't need the s_lock held since the BUSY flag 583 * makes this single threaded. 584 */ 585 if (sqp->s_rnr_retry == 0) { 586 send_status = IB_WC_RNR_RETRY_EXC_ERR; 587 goto serr; 588 } 589 if (sqp->s_rnr_retry_cnt < 7) 590 sqp->s_rnr_retry--; 591 spin_lock_irqsave(&sqp->s_lock, flags); 592 if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK)) 593 goto clr_busy; 594 rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer << 595 IB_AETH_CREDIT_SHIFT); 596 goto clr_busy; 597 598 op_err: 599 send_status = IB_WC_REM_OP_ERR; 600 wc.status = IB_WC_LOC_QP_OP_ERR; 601 goto err; 602 603 inv_err: 604 send_status = IB_WC_REM_INV_REQ_ERR; 605 wc.status = IB_WC_LOC_QP_OP_ERR; 606 goto err; 607 608 acc_err: 609 send_status = IB_WC_REM_ACCESS_ERR; 610 wc.status = IB_WC_LOC_PROT_ERR; 611 err: 612 /* responder goes to error state */ 613 rvt_rc_error(qp, wc.status); 614 615 serr: 616 spin_lock_irqsave(&sqp->s_lock, flags); 617 hfi1_send_complete(sqp, wqe, send_status); 618 if (sqp->ibqp.qp_type == IB_QPT_RC) { 619 int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); 620 621 sqp->s_flags &= ~RVT_S_BUSY; 622 spin_unlock_irqrestore(&sqp->s_lock, flags); 623 if (lastwqe) { 624 struct ib_event ev; 625 626 ev.device = sqp->ibqp.device; 627 ev.element.qp = &sqp->ibqp; 628 ev.event = IB_EVENT_QP_LAST_WQE_REACHED; 629 sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context); 630 } 631 goto done; 632 } 633 clr_busy: 634 sqp->s_flags &= ~RVT_S_BUSY; 635 unlock: 636 spin_unlock_irqrestore(&sqp->s_lock, flags); 637 done: 638 rcu_read_unlock(); 639 } 640 641 /** 642 * hfi1_make_grh - construct a GRH header 643 * @ibp: a pointer to the IB port 644 * @hdr: a pointer to the GRH header being constructed 645 * @grh: the global route address to send to 646 * @hwords: the number of 32 bit words of header being sent 647 * @nwords: the number of 32 bit words of data being sent 648 * 649 * Return the size of the header in 32 bit words. 650 */ 651 u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, 652 const struct ib_global_route *grh, u32 hwords, u32 nwords) 653 { 654 hdr->version_tclass_flow = 655 cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) | 656 (grh->traffic_class << IB_GRH_TCLASS_SHIFT) | 657 (grh->flow_label << IB_GRH_FLOW_SHIFT)); 658 hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2); 659 /* next_hdr is defined by C8-7 in ch. 8.4.1 */ 660 hdr->next_hdr = IB_GRH_NEXT_HDR; 661 hdr->hop_limit = grh->hop_limit; 662 /* The SGID is 32-bit aligned. */ 663 hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix; 664 hdr->sgid.global.interface_id = 665 grh->sgid_index < HFI1_GUIDS_PER_PORT ? 666 get_sguid(ibp, grh->sgid_index) : 667 get_sguid(ibp, HFI1_PORT_GUID_INDEX); 668 hdr->dgid = grh->dgid; 669 670 /* GRH header size in 32-bit words. */ 671 return sizeof(struct ib_grh) / sizeof(u32); 672 } 673 674 #define BTH2_OFFSET (offsetof(struct hfi1_sdma_header, hdr.u.oth.bth[2]) / 4) 675 676 /** 677 * build_ahg - create ahg in s_ahg 678 * @qp: a pointer to QP 679 * @npsn: the next PSN for the request/response 680 * 681 * This routine handles the AHG by allocating an ahg entry and causing the 682 * copy of the first middle. 683 * 684 * Subsequent middles use the copied entry, editing the 685 * PSN with 1 or 2 edits. 686 */ 687 static inline void build_ahg(struct rvt_qp *qp, u32 npsn) 688 { 689 struct hfi1_qp_priv *priv = qp->priv; 690 691 if (unlikely(qp->s_flags & RVT_S_AHG_CLEAR)) 692 clear_ahg(qp); 693 if (!(qp->s_flags & RVT_S_AHG_VALID)) { 694 /* first middle that needs copy */ 695 if (qp->s_ahgidx < 0) 696 qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde); 697 if (qp->s_ahgidx >= 0) { 698 qp->s_ahgpsn = npsn; 699 priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY; 700 /* save to protect a change in another thread */ 701 priv->s_ahg->ahgidx = qp->s_ahgidx; 702 qp->s_flags |= RVT_S_AHG_VALID; 703 } 704 } else { 705 /* subsequent middle after valid */ 706 if (qp->s_ahgidx >= 0) { 707 priv->s_ahg->tx_flags |= SDMA_TXREQ_F_USE_AHG; 708 priv->s_ahg->ahgidx = qp->s_ahgidx; 709 priv->s_ahg->ahgcount++; 710 priv->s_ahg->ahgdesc[0] = 711 sdma_build_ahg_descriptor( 712 (__force u16)cpu_to_be16((u16)npsn), 713 BTH2_OFFSET, 714 16, 715 16); 716 if ((npsn & 0xffff0000) != 717 (qp->s_ahgpsn & 0xffff0000)) { 718 priv->s_ahg->ahgcount++; 719 priv->s_ahg->ahgdesc[1] = 720 sdma_build_ahg_descriptor( 721 (__force u16)cpu_to_be16( 722 (u16)(npsn >> 16)), 723 BTH2_OFFSET, 724 0, 725 16); 726 } 727 } 728 } 729 } 730 731 void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, 732 u32 bth0, u32 bth2, int middle, 733 struct hfi1_pkt_state *ps) 734 { 735 struct hfi1_qp_priv *priv = qp->priv; 736 struct hfi1_ibport *ibp = ps->ibp; 737 u16 lrh0; 738 u32 nwords; 739 u32 extra_bytes; 740 u32 bth1; 741 742 /* Construct the header. */ 743 extra_bytes = -ps->s_txreq->s_cur_size & 3; 744 nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2; 745 lrh0 = HFI1_LRH_BTH; 746 if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) { 747 qp->s_hdrwords += 748 hfi1_make_grh(ibp, 749 &ps->s_txreq->phdr.hdr.u.l.grh, 750 rdma_ah_read_grh(&qp->remote_ah_attr), 751 qp->s_hdrwords, nwords); 752 lrh0 = HFI1_LRH_GRH; 753 middle = 0; 754 } 755 lrh0 |= (priv->s_sc & 0xf) << 12 | 756 (rdma_ah_get_sl(&qp->remote_ah_attr) & 0xf) << 4; 757 /* 758 * reset s_ahg/AHG fields 759 * 760 * This insures that the ahgentry/ahgcount 761 * are at a non-AHG default to protect 762 * build_verbs_tx_desc() from using 763 * an include ahgidx. 764 * 765 * build_ahg() will modify as appropriate 766 * to use the AHG feature. 767 */ 768 priv->s_ahg->tx_flags = 0; 769 priv->s_ahg->ahgcount = 0; 770 priv->s_ahg->ahgidx = 0; 771 if (qp->s_mig_state == IB_MIG_MIGRATED) 772 bth0 |= IB_BTH_MIG_REQ; 773 else 774 middle = 0; 775 if (middle) 776 build_ahg(qp, bth2); 777 else 778 qp->s_flags &= ~RVT_S_AHG_VALID; 779 ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0); 780 ps->s_txreq->phdr.hdr.lrh[1] = 781 cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr)); 782 ps->s_txreq->phdr.hdr.lrh[2] = 783 cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); 784 ps->s_txreq->phdr.hdr.lrh[3] = 785 cpu_to_be16(ppd_from_ibp(ibp)->lid | 786 rdma_ah_get_path_bits(&qp->remote_ah_attr)); 787 bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index); 788 bth0 |= extra_bytes << 20; 789 ohdr->bth[0] = cpu_to_be32(bth0); 790 bth1 = qp->remote_qpn; 791 if (qp->s_flags & RVT_S_ECN) { 792 qp->s_flags &= ~RVT_S_ECN; 793 /* we recently received a FECN, so return a BECN */ 794 bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT); 795 } 796 ohdr->bth[1] = cpu_to_be32(bth1); 797 ohdr->bth[2] = cpu_to_be32(bth2); 798 } 799 800 /* when sending, force a reschedule every one of these periods */ 801 #define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */ 802 803 /** 804 * schedule_send_yield - test for a yield required for QP send engine 805 * @timeout: Final time for timeout slice for jiffies 806 * @qp: a pointer to QP 807 * @ps: a pointer to a structure with commonly lookup values for 808 * the the send engine progress 809 * 810 * This routine checks if the time slice for the QP has expired 811 * for RC QPs, if so an additional work entry is queued. At this 812 * point, other QPs have an opportunity to be scheduled. It 813 * returns true if a yield is required, otherwise, false 814 * is returned. 815 */ 816 static bool schedule_send_yield(struct rvt_qp *qp, 817 struct hfi1_pkt_state *ps) 818 { 819 if (unlikely(time_after(jiffies, ps->timeout))) { 820 if (!ps->in_thread || 821 workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) { 822 spin_lock_irqsave(&qp->s_lock, ps->flags); 823 qp->s_flags &= ~RVT_S_BUSY; 824 hfi1_schedule_send(qp); 825 spin_unlock_irqrestore(&qp->s_lock, ps->flags); 826 this_cpu_inc(*ps->ppd->dd->send_schedule); 827 trace_hfi1_rc_expired_time_slice(qp, true); 828 return true; 829 } 830 831 cond_resched(); 832 this_cpu_inc(*ps->ppd->dd->send_schedule); 833 ps->timeout = jiffies + ps->timeout_int; 834 } 835 836 trace_hfi1_rc_expired_time_slice(qp, false); 837 return false; 838 } 839 840 void hfi1_do_send_from_rvt(struct rvt_qp *qp) 841 { 842 hfi1_do_send(qp, false); 843 } 844 845 void _hfi1_do_send(struct work_struct *work) 846 { 847 struct iowait *wait = container_of(work, struct iowait, iowork); 848 struct rvt_qp *qp = iowait_to_qp(wait); 849 850 hfi1_do_send(qp, true); 851 } 852 853 /** 854 * hfi1_do_send - perform a send on a QP 855 * @work: contains a pointer to the QP 856 * @in_thread: true if in a workqueue thread 857 * 858 * Process entries in the send work queue until credit or queue is 859 * exhausted. Only allow one CPU to send a packet per QP. 860 * Otherwise, two threads could send packets out of order. 861 */ 862 void hfi1_do_send(struct rvt_qp *qp, bool in_thread) 863 { 864 struct hfi1_pkt_state ps; 865 struct hfi1_qp_priv *priv = qp->priv; 866 int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps); 867 868 ps.dev = to_idev(qp->ibqp.device); 869 ps.ibp = to_iport(qp->ibqp.device, qp->port_num); 870 ps.ppd = ppd_from_ibp(ps.ibp); 871 ps.in_thread = in_thread; 872 873 trace_hfi1_rc_do_send(qp, in_thread); 874 875 switch (qp->ibqp.qp_type) { 876 case IB_QPT_RC: 877 if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) & 878 ~((1 << ps.ppd->lmc) - 1)) == 879 ps.ppd->lid)) { 880 ruc_loopback(qp); 881 return; 882 } 883 make_req = hfi1_make_rc_req; 884 ps.timeout_int = qp->timeout_jiffies; 885 break; 886 case IB_QPT_UC: 887 if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) & 888 ~((1 << ps.ppd->lmc) - 1)) == 889 ps.ppd->lid)) { 890 ruc_loopback(qp); 891 return; 892 } 893 make_req = hfi1_make_uc_req; 894 ps.timeout_int = SEND_RESCHED_TIMEOUT; 895 break; 896 default: 897 make_req = hfi1_make_ud_req; 898 ps.timeout_int = SEND_RESCHED_TIMEOUT; 899 } 900 901 spin_lock_irqsave(&qp->s_lock, ps.flags); 902 903 /* Return if we are already busy processing a work request. */ 904 if (!hfi1_send_ok(qp)) { 905 spin_unlock_irqrestore(&qp->s_lock, ps.flags); 906 return; 907 } 908 909 qp->s_flags |= RVT_S_BUSY; 910 911 ps.timeout_int = ps.timeout_int / 8; 912 ps.timeout = jiffies + ps.timeout_int; 913 ps.cpu = priv->s_sde ? priv->s_sde->cpu : 914 cpumask_first(cpumask_of_node(ps.ppd->dd->node)); 915 916 /* insure a pre-built packet is handled */ 917 ps.s_txreq = get_waiting_verbs_txreq(qp); 918 do { 919 /* Check for a constructed packet to be sent. */ 920 if (qp->s_hdrwords != 0) { 921 spin_unlock_irqrestore(&qp->s_lock, ps.flags); 922 /* 923 * If the packet cannot be sent now, return and 924 * the send engine will be woken up later. 925 */ 926 if (hfi1_verbs_send(qp, &ps)) 927 return; 928 /* Record that s_ahg is empty. */ 929 qp->s_hdrwords = 0; 930 /* allow other tasks to run */ 931 if (schedule_send_yield(qp, &ps)) 932 return; 933 934 spin_lock_irqsave(&qp->s_lock, ps.flags); 935 } 936 } while (make_req(qp, &ps)); 937 938 spin_unlock_irqrestore(&qp->s_lock, ps.flags); 939 } 940 941 /* 942 * This should be called with s_lock held. 943 */ 944 void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, 945 enum ib_wc_status status) 946 { 947 u32 old_last, last; 948 949 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) 950 return; 951 952 last = qp->s_last; 953 old_last = last; 954 trace_hfi1_qp_send_completion(qp, wqe, last); 955 if (++last >= qp->s_size) 956 last = 0; 957 trace_hfi1_qp_send_completion(qp, wqe, last); 958 qp->s_last = last; 959 /* See post_send() */ 960 barrier(); 961 rvt_put_swqe(wqe); 962 if (qp->ibqp.qp_type == IB_QPT_UD || 963 qp->ibqp.qp_type == IB_QPT_SMI || 964 qp->ibqp.qp_type == IB_QPT_GSI) 965 atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); 966 967 rvt_qp_swqe_complete(qp, 968 wqe, 969 ib_hfi1_wc_opcode[wqe->wr.opcode], 970 status); 971 972 if (qp->s_acked == old_last) 973 qp->s_acked = last; 974 if (qp->s_cur == old_last) 975 qp->s_cur = last; 976 if (qp->s_tail == old_last) 977 qp->s_tail = last; 978 if (qp->state == IB_QPS_SQD && last == qp->s_cur) 979 qp->s_draining = 0; 980 } 981