1 /* 2 * Copyright(c) 2015, 2016 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 #include <linux/err.h> 49 #include <linux/vmalloc.h> 50 #include <linux/hash.h> 51 #include <linux/module.h> 52 #include <linux/seq_file.h> 53 #include <rdma/rdma_vt.h> 54 #include <rdma/rdmavt_qp.h> 55 56 #include "hfi.h" 57 #include "qp.h" 58 #include "trace.h" 59 #include "verbs_txreq.h" 60 61 unsigned int hfi1_qp_table_size = 256; 62 module_param_named(qp_table_size, hfi1_qp_table_size, uint, S_IRUGO); 63 MODULE_PARM_DESC(qp_table_size, "QP table size"); 64 65 static void flush_tx_list(struct rvt_qp *qp); 66 static int iowait_sleep( 67 struct sdma_engine *sde, 68 struct iowait *wait, 69 struct sdma_txreq *stx, 70 unsigned seq); 71 static void iowait_wakeup(struct iowait *wait, int reason); 72 static void iowait_sdma_drained(struct iowait *wait); 73 static void qp_pio_drain(struct rvt_qp *qp); 74 75 static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, 76 struct rvt_qpn_map *map, unsigned off) 77 { 78 return (map - qpt->map) * RVT_BITS_PER_PAGE + off; 79 } 80 81 /* 82 * Convert the AETH credit code into the number of credits. 83 */ 84 static const u16 credit_table[31] = { 85 0, /* 0 */ 86 1, /* 1 */ 87 2, /* 2 */ 88 3, /* 3 */ 89 4, /* 4 */ 90 6, /* 5 */ 91 8, /* 6 */ 92 12, /* 7 */ 93 16, /* 8 */ 94 24, /* 9 */ 95 32, /* A */ 96 48, /* B */ 97 64, /* C */ 98 96, /* D */ 99 128, /* E */ 100 192, /* F */ 101 256, /* 10 */ 102 384, /* 11 */ 103 512, /* 12 */ 104 768, /* 13 */ 105 1024, /* 14 */ 106 1536, /* 15 */ 107 2048, /* 16 */ 108 3072, /* 17 */ 109 4096, /* 18 */ 110 6144, /* 19 */ 111 8192, /* 1A */ 112 12288, /* 1B */ 113 16384, /* 1C */ 114 24576, /* 1D */ 115 32768 /* 1E */ 116 }; 117 118 static void flush_tx_list(struct rvt_qp *qp) 119 { 120 struct hfi1_qp_priv *priv = qp->priv; 121 122 while (!list_empty(&priv->s_iowait.tx_head)) { 123 struct sdma_txreq *tx; 124 125 tx = list_first_entry( 126 &priv->s_iowait.tx_head, 127 struct sdma_txreq, 128 list); 129 list_del_init(&tx->list); 130 hfi1_put_txreq( 131 container_of(tx, struct verbs_txreq, txreq)); 132 } 133 } 134 135 static void flush_iowait(struct rvt_qp *qp) 136 { 137 struct hfi1_qp_priv *priv = qp->priv; 138 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); 139 unsigned long flags; 140 141 write_seqlock_irqsave(&dev->iowait_lock, flags); 142 if (!list_empty(&priv->s_iowait.list)) { 143 list_del_init(&priv->s_iowait.list); 144 if (atomic_dec_and_test(&qp->refcount)) 145 wake_up(&qp->wait); 146 } 147 write_sequnlock_irqrestore(&dev->iowait_lock, flags); 148 } 149 150 static inline int opa_mtu_enum_to_int(int mtu) 151 { 152 switch (mtu) { 153 case OPA_MTU_8192: return 8192; 154 case OPA_MTU_10240: return 10240; 155 default: return -1; 156 } 157 } 158 159 /** 160 * This function is what we would push to the core layer if we wanted to be a 161 * "first class citizen". Instead we hide this here and rely on Verbs ULPs 162 * to blindly pass the MTU enum value from the PathRecord to us. 163 */ 164 static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) 165 { 166 int val; 167 168 /* Constraining 10KB packets to 8KB packets */ 169 if (mtu == (enum ib_mtu)OPA_MTU_10240) 170 mtu = OPA_MTU_8192; 171 val = opa_mtu_enum_to_int((int)mtu); 172 if (val > 0) 173 return val; 174 return ib_mtu_enum_to_int(mtu); 175 } 176 177 int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, 178 int attr_mask, struct ib_udata *udata) 179 { 180 struct ib_qp *ibqp = &qp->ibqp; 181 struct hfi1_ibdev *dev = to_idev(ibqp->device); 182 struct hfi1_devdata *dd = dd_from_dev(dev); 183 u8 sc; 184 185 if (attr_mask & IB_QP_AV) { 186 sc = ah_to_sc(ibqp->device, &attr->ah_attr); 187 if (sc == 0xf) 188 return -EINVAL; 189 190 if (!qp_to_sdma_engine(qp, sc) && 191 dd->flags & HFI1_HAS_SEND_DMA) 192 return -EINVAL; 193 194 if (!qp_to_send_context(qp, sc)) 195 return -EINVAL; 196 } 197 198 if (attr_mask & IB_QP_ALT_PATH) { 199 sc = ah_to_sc(ibqp->device, &attr->alt_ah_attr); 200 if (sc == 0xf) 201 return -EINVAL; 202 203 if (!qp_to_sdma_engine(qp, sc) && 204 dd->flags & HFI1_HAS_SEND_DMA) 205 return -EINVAL; 206 207 if (!qp_to_send_context(qp, sc)) 208 return -EINVAL; 209 } 210 211 return 0; 212 } 213 214 void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, 215 int attr_mask, struct ib_udata *udata) 216 { 217 struct ib_qp *ibqp = &qp->ibqp; 218 struct hfi1_qp_priv *priv = qp->priv; 219 220 if (attr_mask & IB_QP_AV) { 221 priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); 222 priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); 223 priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); 224 } 225 226 if (attr_mask & IB_QP_PATH_MIG_STATE && 227 attr->path_mig_state == IB_MIG_MIGRATED && 228 qp->s_mig_state == IB_MIG_ARMED) { 229 qp->s_flags |= RVT_S_AHG_CLEAR; 230 priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); 231 priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); 232 priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); 233 } 234 } 235 236 /** 237 * hfi1_check_send_wqe - validate wqe 238 * @qp - The qp 239 * @wqe - The built wqe 240 * 241 * validate wqe. This is called 242 * prior to inserting the wqe into 243 * the ring but after the wqe has been 244 * setup. 245 * 246 * Returns 0 on success, -EINVAL on failure 247 * 248 */ 249 int hfi1_check_send_wqe(struct rvt_qp *qp, 250 struct rvt_swqe *wqe) 251 { 252 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 253 struct rvt_ah *ah; 254 255 switch (qp->ibqp.qp_type) { 256 case IB_QPT_RC: 257 case IB_QPT_UC: 258 if (wqe->length > 0x80000000U) 259 return -EINVAL; 260 break; 261 case IB_QPT_SMI: 262 ah = ibah_to_rvtah(wqe->ud_wr.ah); 263 if (wqe->length > (1 << ah->log_pmtu)) 264 return -EINVAL; 265 break; 266 case IB_QPT_GSI: 267 case IB_QPT_UD: 268 ah = ibah_to_rvtah(wqe->ud_wr.ah); 269 if (wqe->length > (1 << ah->log_pmtu)) 270 return -EINVAL; 271 if (ibp->sl_to_sc[ah->attr.sl] == 0xf) 272 return -EINVAL; 273 default: 274 break; 275 } 276 return wqe->length <= piothreshold; 277 } 278 279 /** 280 * hfi1_compute_aeth - compute the AETH (syndrome + MSN) 281 * @qp: the queue pair to compute the AETH for 282 * 283 * Returns the AETH. 284 */ 285 __be32 hfi1_compute_aeth(struct rvt_qp *qp) 286 { 287 u32 aeth = qp->r_msn & HFI1_MSN_MASK; 288 289 if (qp->ibqp.srq) { 290 /* 291 * Shared receive queues don't generate credits. 292 * Set the credit field to the invalid value. 293 */ 294 aeth |= HFI1_AETH_CREDIT_INVAL << HFI1_AETH_CREDIT_SHIFT; 295 } else { 296 u32 min, max, x; 297 u32 credits; 298 struct rvt_rwq *wq = qp->r_rq.wq; 299 u32 head; 300 u32 tail; 301 302 /* sanity check pointers before trusting them */ 303 head = wq->head; 304 if (head >= qp->r_rq.size) 305 head = 0; 306 tail = wq->tail; 307 if (tail >= qp->r_rq.size) 308 tail = 0; 309 /* 310 * Compute the number of credits available (RWQEs). 311 * There is a small chance that the pair of reads are 312 * not atomic, which is OK, since the fuzziness is 313 * resolved as further ACKs go out. 314 */ 315 credits = head - tail; 316 if ((int)credits < 0) 317 credits += qp->r_rq.size; 318 /* 319 * Binary search the credit table to find the code to 320 * use. 321 */ 322 min = 0; 323 max = 31; 324 for (;;) { 325 x = (min + max) / 2; 326 if (credit_table[x] == credits) 327 break; 328 if (credit_table[x] > credits) { 329 max = x; 330 } else { 331 if (min == x) 332 break; 333 min = x; 334 } 335 } 336 aeth |= x << HFI1_AETH_CREDIT_SHIFT; 337 } 338 return cpu_to_be32(aeth); 339 } 340 341 /** 342 * _hfi1_schedule_send - schedule progress 343 * @qp: the QP 344 * 345 * This schedules qp progress w/o regard to the s_flags. 346 * 347 * It is only used in the post send, which doesn't hold 348 * the s_lock. 349 */ 350 void _hfi1_schedule_send(struct rvt_qp *qp) 351 { 352 struct hfi1_qp_priv *priv = qp->priv; 353 struct hfi1_ibport *ibp = 354 to_iport(qp->ibqp.device, qp->port_num); 355 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 356 struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); 357 358 iowait_schedule(&priv->s_iowait, ppd->hfi1_wq, 359 priv->s_sde ? 360 priv->s_sde->cpu : 361 cpumask_first(cpumask_of_node(dd->node))); 362 } 363 364 static void qp_pio_drain(struct rvt_qp *qp) 365 { 366 struct hfi1_ibdev *dev; 367 struct hfi1_qp_priv *priv = qp->priv; 368 369 if (!priv->s_sendcontext) 370 return; 371 dev = to_idev(qp->ibqp.device); 372 while (iowait_pio_pending(&priv->s_iowait)) { 373 write_seqlock_irq(&dev->iowait_lock); 374 hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1); 375 write_sequnlock_irq(&dev->iowait_lock); 376 iowait_pio_drain(&priv->s_iowait); 377 write_seqlock_irq(&dev->iowait_lock); 378 hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0); 379 write_sequnlock_irq(&dev->iowait_lock); 380 } 381 } 382 383 /** 384 * hfi1_schedule_send - schedule progress 385 * @qp: the QP 386 * 387 * This schedules qp progress and caller should hold 388 * the s_lock. 389 */ 390 void hfi1_schedule_send(struct rvt_qp *qp) 391 { 392 if (hfi1_send_ok(qp)) 393 _hfi1_schedule_send(qp); 394 } 395 396 /** 397 * hfi1_get_credit - flush the send work queue of a QP 398 * @qp: the qp who's send work queue to flush 399 * @aeth: the Acknowledge Extended Transport Header 400 * 401 * The QP s_lock should be held. 402 */ 403 void hfi1_get_credit(struct rvt_qp *qp, u32 aeth) 404 { 405 u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK; 406 407 /* 408 * If the credit is invalid, we can send 409 * as many packets as we like. Otherwise, we have to 410 * honor the credit field. 411 */ 412 if (credit == HFI1_AETH_CREDIT_INVAL) { 413 if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) { 414 qp->s_flags |= RVT_S_UNLIMITED_CREDIT; 415 if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) { 416 qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT; 417 hfi1_schedule_send(qp); 418 } 419 } 420 } else if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) { 421 /* Compute new LSN (i.e., MSN + credit) */ 422 credit = (aeth + credit_table[credit]) & HFI1_MSN_MASK; 423 if (cmp_msn(credit, qp->s_lsn) > 0) { 424 qp->s_lsn = credit; 425 if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) { 426 qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT; 427 hfi1_schedule_send(qp); 428 } 429 } 430 } 431 } 432 433 void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag) 434 { 435 unsigned long flags; 436 437 spin_lock_irqsave(&qp->s_lock, flags); 438 if (qp->s_flags & flag) { 439 qp->s_flags &= ~flag; 440 trace_hfi1_qpwakeup(qp, flag); 441 hfi1_schedule_send(qp); 442 } 443 spin_unlock_irqrestore(&qp->s_lock, flags); 444 /* Notify hfi1_destroy_qp() if it is waiting. */ 445 if (atomic_dec_and_test(&qp->refcount)) 446 wake_up(&qp->wait); 447 } 448 449 static int iowait_sleep( 450 struct sdma_engine *sde, 451 struct iowait *wait, 452 struct sdma_txreq *stx, 453 unsigned seq) 454 { 455 struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq); 456 struct rvt_qp *qp; 457 struct hfi1_qp_priv *priv; 458 unsigned long flags; 459 int ret = 0; 460 struct hfi1_ibdev *dev; 461 462 qp = tx->qp; 463 priv = qp->priv; 464 465 spin_lock_irqsave(&qp->s_lock, flags); 466 if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { 467 /* 468 * If we couldn't queue the DMA request, save the info 469 * and try again later rather than destroying the 470 * buffer and undoing the side effects of the copy. 471 */ 472 /* Make a common routine? */ 473 dev = &sde->dd->verbs_dev; 474 list_add_tail(&stx->list, &wait->tx_head); 475 write_seqlock(&dev->iowait_lock); 476 if (sdma_progress(sde, seq, stx)) 477 goto eagain; 478 if (list_empty(&priv->s_iowait.list)) { 479 struct hfi1_ibport *ibp = 480 to_iport(qp->ibqp.device, qp->port_num); 481 482 ibp->rvp.n_dmawait++; 483 qp->s_flags |= RVT_S_WAIT_DMA_DESC; 484 list_add_tail(&priv->s_iowait.list, &sde->dmawait); 485 trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); 486 atomic_inc(&qp->refcount); 487 } 488 write_sequnlock(&dev->iowait_lock); 489 qp->s_flags &= ~RVT_S_BUSY; 490 spin_unlock_irqrestore(&qp->s_lock, flags); 491 ret = -EBUSY; 492 } else { 493 spin_unlock_irqrestore(&qp->s_lock, flags); 494 hfi1_put_txreq(tx); 495 } 496 return ret; 497 eagain: 498 write_sequnlock(&dev->iowait_lock); 499 spin_unlock_irqrestore(&qp->s_lock, flags); 500 list_del_init(&stx->list); 501 return -EAGAIN; 502 } 503 504 static void iowait_wakeup(struct iowait *wait, int reason) 505 { 506 struct rvt_qp *qp = iowait_to_qp(wait); 507 508 WARN_ON(reason != SDMA_AVAIL_REASON); 509 hfi1_qp_wakeup(qp, RVT_S_WAIT_DMA_DESC); 510 } 511 512 static void iowait_sdma_drained(struct iowait *wait) 513 { 514 struct rvt_qp *qp = iowait_to_qp(wait); 515 unsigned long flags; 516 517 /* 518 * This happens when the send engine notes 519 * a QP in the error state and cannot 520 * do the flush work until that QP's 521 * sdma work has finished. 522 */ 523 spin_lock_irqsave(&qp->s_lock, flags); 524 if (qp->s_flags & RVT_S_WAIT_DMA) { 525 qp->s_flags &= ~RVT_S_WAIT_DMA; 526 hfi1_schedule_send(qp); 527 } 528 spin_unlock_irqrestore(&qp->s_lock, flags); 529 } 530 531 /** 532 * 533 * qp_to_sdma_engine - map a qp to a send engine 534 * @qp: the QP 535 * @sc5: the 5 bit sc 536 * 537 * Return: 538 * A send engine for the qp or NULL for SMI type qp. 539 */ 540 struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5) 541 { 542 struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); 543 struct sdma_engine *sde; 544 545 if (!(dd->flags & HFI1_HAS_SEND_DMA)) 546 return NULL; 547 switch (qp->ibqp.qp_type) { 548 case IB_QPT_SMI: 549 return NULL; 550 default: 551 break; 552 } 553 sde = sdma_select_engine_sc(dd, qp->ibqp.qp_num >> dd->qos_shift, sc5); 554 return sde; 555 } 556 557 /* 558 * qp_to_send_context - map a qp to a send context 559 * @qp: the QP 560 * @sc5: the 5 bit sc 561 * 562 * Return: 563 * A send context for the qp 564 */ 565 struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5) 566 { 567 struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); 568 569 switch (qp->ibqp.qp_type) { 570 case IB_QPT_SMI: 571 /* SMA packets to VL15 */ 572 return dd->vld[15].sc; 573 default: 574 break; 575 } 576 577 return pio_select_send_context_sc(dd, qp->ibqp.qp_num >> dd->qos_shift, 578 sc5); 579 } 580 581 struct qp_iter { 582 struct hfi1_ibdev *dev; 583 struct rvt_qp *qp; 584 int specials; 585 int n; 586 }; 587 588 struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev) 589 { 590 struct qp_iter *iter; 591 592 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 593 if (!iter) 594 return NULL; 595 596 iter->dev = dev; 597 iter->specials = dev->rdi.ibdev.phys_port_cnt * 2; 598 if (qp_iter_next(iter)) { 599 kfree(iter); 600 return NULL; 601 } 602 603 return iter; 604 } 605 606 int qp_iter_next(struct qp_iter *iter) 607 { 608 struct hfi1_ibdev *dev = iter->dev; 609 int n = iter->n; 610 int ret = 1; 611 struct rvt_qp *pqp = iter->qp; 612 struct rvt_qp *qp; 613 614 /* 615 * The approach is to consider the special qps 616 * as an additional table entries before the 617 * real hash table. Since the qp code sets 618 * the qp->next hash link to NULL, this works just fine. 619 * 620 * iter->specials is 2 * # ports 621 * 622 * n = 0..iter->specials is the special qp indices 623 * 624 * n = iter->specials..dev->rdi.qp_dev->qp_table_size+iter->specials are 625 * the potential hash bucket entries 626 * 627 */ 628 for (; n < dev->rdi.qp_dev->qp_table_size + iter->specials; n++) { 629 if (pqp) { 630 qp = rcu_dereference(pqp->next); 631 } else { 632 if (n < iter->specials) { 633 struct hfi1_pportdata *ppd; 634 struct hfi1_ibport *ibp; 635 int pidx; 636 637 pidx = n % dev->rdi.ibdev.phys_port_cnt; 638 ppd = &dd_from_dev(dev)->pport[pidx]; 639 ibp = &ppd->ibport_data; 640 641 if (!(n & 1)) 642 qp = rcu_dereference(ibp->rvp.qp[0]); 643 else 644 qp = rcu_dereference(ibp->rvp.qp[1]); 645 } else { 646 qp = rcu_dereference( 647 dev->rdi.qp_dev->qp_table[ 648 (n - iter->specials)]); 649 } 650 } 651 pqp = qp; 652 if (qp) { 653 iter->qp = qp; 654 iter->n = n; 655 return 0; 656 } 657 } 658 return ret; 659 } 660 661 static const char * const qp_type_str[] = { 662 "SMI", "GSI", "RC", "UC", "UD", 663 }; 664 665 static int qp_idle(struct rvt_qp *qp) 666 { 667 return 668 qp->s_last == qp->s_acked && 669 qp->s_acked == qp->s_cur && 670 qp->s_cur == qp->s_tail && 671 qp->s_tail == qp->s_head; 672 } 673 674 void qp_iter_print(struct seq_file *s, struct qp_iter *iter) 675 { 676 struct rvt_swqe *wqe; 677 struct rvt_qp *qp = iter->qp; 678 struct hfi1_qp_priv *priv = qp->priv; 679 struct sdma_engine *sde; 680 struct send_context *send_context; 681 682 sde = qp_to_sdma_engine(qp, priv->s_sc); 683 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 684 send_context = qp_to_send_context(qp, priv->s_sc); 685 seq_printf(s, 686 "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n", 687 iter->n, 688 qp_idle(qp) ? "I" : "B", 689 qp->ibqp.qp_num, 690 atomic_read(&qp->refcount), 691 qp_type_str[qp->ibqp.qp_type], 692 qp->state, 693 wqe ? wqe->wr.opcode : 0, 694 qp->s_hdrwords, 695 qp->s_flags, 696 iowait_sdma_pending(&priv->s_iowait), 697 iowait_pio_pending(&priv->s_iowait), 698 !list_empty(&priv->s_iowait.list), 699 qp->timeout, 700 wqe ? wqe->ssn : 0, 701 qp->s_lsn, 702 qp->s_last_psn, 703 qp->s_psn, qp->s_next_psn, 704 qp->s_sending_psn, qp->s_sending_hpsn, 705 qp->s_last, qp->s_acked, qp->s_cur, 706 qp->s_tail, qp->s_head, qp->s_size, 707 qp->s_avail, 708 qp->remote_qpn, 709 qp->remote_ah_attr.dlid, 710 qp->remote_ah_attr.sl, 711 qp->pmtu, 712 qp->s_retry, 713 qp->s_retry_cnt, 714 qp->s_rnr_retry_cnt, 715 sde, 716 sde ? sde->this_idx : 0, 717 send_context, 718 send_context ? send_context->sw_index : 0, 719 ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head, 720 ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail, 721 qp->pid); 722 } 723 724 void qp_comm_est(struct rvt_qp *qp) 725 { 726 qp->r_flags |= RVT_R_COMM_EST; 727 if (qp->ibqp.event_handler) { 728 struct ib_event ev; 729 730 ev.device = qp->ibqp.device; 731 ev.element.qp = &qp->ibqp; 732 ev.event = IB_EVENT_COMM_EST; 733 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); 734 } 735 } 736 737 void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, 738 gfp_t gfp) 739 { 740 struct hfi1_qp_priv *priv; 741 742 priv = kzalloc_node(sizeof(*priv), gfp, rdi->dparms.node); 743 if (!priv) 744 return ERR_PTR(-ENOMEM); 745 746 priv->owner = qp; 747 748 priv->s_hdr = kzalloc_node(sizeof(*priv->s_hdr), gfp, rdi->dparms.node); 749 if (!priv->s_hdr) { 750 kfree(priv); 751 return ERR_PTR(-ENOMEM); 752 } 753 setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp); 754 qp->s_timer.function = hfi1_rc_timeout; 755 return priv; 756 } 757 758 void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp) 759 { 760 struct hfi1_qp_priv *priv = qp->priv; 761 762 kfree(priv->s_hdr); 763 kfree(priv); 764 } 765 766 unsigned free_all_qps(struct rvt_dev_info *rdi) 767 { 768 struct hfi1_ibdev *verbs_dev = container_of(rdi, 769 struct hfi1_ibdev, 770 rdi); 771 struct hfi1_devdata *dd = container_of(verbs_dev, 772 struct hfi1_devdata, 773 verbs_dev); 774 int n; 775 unsigned qp_inuse = 0; 776 777 for (n = 0; n < dd->num_pports; n++) { 778 struct hfi1_ibport *ibp = &dd->pport[n].ibport_data; 779 780 rcu_read_lock(); 781 if (rcu_dereference(ibp->rvp.qp[0])) 782 qp_inuse++; 783 if (rcu_dereference(ibp->rvp.qp[1])) 784 qp_inuse++; 785 rcu_read_unlock(); 786 } 787 788 return qp_inuse; 789 } 790 791 void flush_qp_waiters(struct rvt_qp *qp) 792 { 793 flush_iowait(qp); 794 hfi1_stop_rc_timers(qp); 795 } 796 797 void stop_send_queue(struct rvt_qp *qp) 798 { 799 struct hfi1_qp_priv *priv = qp->priv; 800 801 cancel_work_sync(&priv->s_iowait.iowork); 802 hfi1_del_timers_sync(qp); 803 } 804 805 void quiesce_qp(struct rvt_qp *qp) 806 { 807 struct hfi1_qp_priv *priv = qp->priv; 808 809 iowait_sdma_drain(&priv->s_iowait); 810 qp_pio_drain(qp); 811 flush_tx_list(qp); 812 } 813 814 void notify_qp_reset(struct rvt_qp *qp) 815 { 816 struct hfi1_qp_priv *priv = qp->priv; 817 818 iowait_init( 819 &priv->s_iowait, 820 1, 821 _hfi1_do_send, 822 iowait_sleep, 823 iowait_wakeup, 824 iowait_sdma_drained); 825 priv->r_adefered = 0; 826 clear_ahg(qp); 827 } 828 829 /* 830 * Switch to alternate path. 831 * The QP s_lock should be held and interrupts disabled. 832 */ 833 void hfi1_migrate_qp(struct rvt_qp *qp) 834 { 835 struct hfi1_qp_priv *priv = qp->priv; 836 struct ib_event ev; 837 838 qp->s_mig_state = IB_MIG_MIGRATED; 839 qp->remote_ah_attr = qp->alt_ah_attr; 840 qp->port_num = qp->alt_ah_attr.port_num; 841 qp->s_pkey_index = qp->s_alt_pkey_index; 842 qp->s_flags |= RVT_S_AHG_CLEAR; 843 priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr); 844 priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); 845 846 ev.device = qp->ibqp.device; 847 ev.element.qp = &qp->ibqp; 848 ev.event = IB_EVENT_PATH_MIG; 849 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); 850 } 851 852 int mtu_to_path_mtu(u32 mtu) 853 { 854 return mtu_to_enum(mtu, OPA_MTU_8192); 855 } 856 857 u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu) 858 { 859 u32 mtu; 860 struct hfi1_ibdev *verbs_dev = container_of(rdi, 861 struct hfi1_ibdev, 862 rdi); 863 struct hfi1_devdata *dd = container_of(verbs_dev, 864 struct hfi1_devdata, 865 verbs_dev); 866 struct hfi1_ibport *ibp; 867 u8 sc, vl; 868 869 ibp = &dd->pport[qp->port_num - 1].ibport_data; 870 sc = ibp->sl_to_sc[qp->remote_ah_attr.sl]; 871 vl = sc_to_vlt(dd, sc); 872 873 mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu); 874 if (vl < PER_VL_SEND_CONTEXTS) 875 mtu = min_t(u32, mtu, dd->vld[vl].mtu); 876 return mtu; 877 } 878 879 int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp, 880 struct ib_qp_attr *attr) 881 { 882 int mtu, pidx = qp->port_num - 1; 883 struct hfi1_ibdev *verbs_dev = container_of(rdi, 884 struct hfi1_ibdev, 885 rdi); 886 struct hfi1_devdata *dd = container_of(verbs_dev, 887 struct hfi1_devdata, 888 verbs_dev); 889 mtu = verbs_mtu_enum_to_int(qp->ibqp.device, attr->path_mtu); 890 if (mtu == -1) 891 return -1; /* values less than 0 are error */ 892 893 if (mtu > dd->pport[pidx].ibmtu) 894 return mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048); 895 else 896 return attr->path_mtu; 897 } 898 899 void notify_error_qp(struct rvt_qp *qp) 900 { 901 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); 902 struct hfi1_qp_priv *priv = qp->priv; 903 904 write_seqlock(&dev->iowait_lock); 905 if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) { 906 qp->s_flags &= ~RVT_S_ANY_WAIT_IO; 907 list_del_init(&priv->s_iowait.list); 908 if (atomic_dec_and_test(&qp->refcount)) 909 wake_up(&qp->wait); 910 } 911 write_sequnlock(&dev->iowait_lock); 912 913 if (!(qp->s_flags & RVT_S_BUSY)) { 914 qp->s_hdrwords = 0; 915 if (qp->s_rdma_mr) { 916 rvt_put_mr(qp->s_rdma_mr); 917 qp->s_rdma_mr = NULL; 918 } 919 flush_tx_list(qp); 920 } 921 } 922 923 /** 924 * hfi1_error_port_qps - put a port's RC/UC qps into error state 925 * @ibp: the ibport. 926 * @sl: the service level. 927 * 928 * This function places all RC/UC qps with a given service level into error 929 * state. It is generally called to force upper lay apps to abandon stale qps 930 * after an sl->sc mapping change. 931 */ 932 void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl) 933 { 934 struct rvt_qp *qp = NULL; 935 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 936 struct hfi1_ibdev *dev = &ppd->dd->verbs_dev; 937 int n; 938 int lastwqe; 939 struct ib_event ev; 940 941 rcu_read_lock(); 942 943 /* Deal only with RC/UC qps that use the given SL. */ 944 for (n = 0; n < dev->rdi.qp_dev->qp_table_size; n++) { 945 for (qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); qp; 946 qp = rcu_dereference(qp->next)) { 947 if (qp->port_num == ppd->port && 948 (qp->ibqp.qp_type == IB_QPT_UC || 949 qp->ibqp.qp_type == IB_QPT_RC) && 950 qp->remote_ah_attr.sl == sl && 951 (ib_rvt_state_ops[qp->state] & 952 RVT_POST_SEND_OK)) { 953 spin_lock_irq(&qp->r_lock); 954 spin_lock(&qp->s_hlock); 955 spin_lock(&qp->s_lock); 956 lastwqe = rvt_error_qp(qp, 957 IB_WC_WR_FLUSH_ERR); 958 spin_unlock(&qp->s_lock); 959 spin_unlock(&qp->s_hlock); 960 spin_unlock_irq(&qp->r_lock); 961 if (lastwqe) { 962 ev.device = qp->ibqp.device; 963 ev.element.qp = &qp->ibqp; 964 ev.event = 965 IB_EVENT_QP_LAST_WQE_REACHED; 966 qp->ibqp.event_handler(&ev, 967 qp->ibqp.qp_context); 968 } 969 } 970 } 971 } 972 973 rcu_read_unlock(); 974 } 975