1 /* 2 * Copyright(c) 2015, 2016 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 #include <linux/err.h> 49 #include <linux/vmalloc.h> 50 #include <linux/hash.h> 51 #include <linux/module.h> 52 #include <linux/seq_file.h> 53 #include <rdma/rdma_vt.h> 54 #include <rdma/rdmavt_qp.h> 55 #include <rdma/ib_verbs.h> 56 57 #include "hfi.h" 58 #include "qp.h" 59 #include "trace.h" 60 #include "verbs_txreq.h" 61 62 unsigned int hfi1_qp_table_size = 256; 63 module_param_named(qp_table_size, hfi1_qp_table_size, uint, S_IRUGO); 64 MODULE_PARM_DESC(qp_table_size, "QP table size"); 65 66 static void flush_tx_list(struct rvt_qp *qp); 67 static int iowait_sleep( 68 struct sdma_engine *sde, 69 struct iowait *wait, 70 struct sdma_txreq *stx, 71 unsigned seq); 72 static void iowait_wakeup(struct iowait *wait, int reason); 73 static void iowait_sdma_drained(struct iowait *wait); 74 static void qp_pio_drain(struct rvt_qp *qp); 75 76 static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, 77 struct rvt_qpn_map *map, unsigned off) 78 { 79 return (map - qpt->map) * RVT_BITS_PER_PAGE + off; 80 } 81 82 const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = { 83 [IB_WR_RDMA_WRITE] = { 84 .length = sizeof(struct ib_rdma_wr), 85 .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 86 }, 87 88 [IB_WR_RDMA_READ] = { 89 .length = sizeof(struct ib_rdma_wr), 90 .qpt_support = BIT(IB_QPT_RC), 91 .flags = RVT_OPERATION_ATOMIC, 92 }, 93 94 [IB_WR_ATOMIC_CMP_AND_SWP] = { 95 .length = sizeof(struct ib_atomic_wr), 96 .qpt_support = BIT(IB_QPT_RC), 97 .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE, 98 }, 99 100 [IB_WR_ATOMIC_FETCH_AND_ADD] = { 101 .length = sizeof(struct ib_atomic_wr), 102 .qpt_support = BIT(IB_QPT_RC), 103 .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE, 104 }, 105 106 [IB_WR_RDMA_WRITE_WITH_IMM] = { 107 .length = sizeof(struct ib_rdma_wr), 108 .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 109 }, 110 111 [IB_WR_SEND] = { 112 .length = sizeof(struct ib_send_wr), 113 .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) | 114 BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 115 }, 116 117 [IB_WR_SEND_WITH_IMM] = { 118 .length = sizeof(struct ib_send_wr), 119 .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) | 120 BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 121 }, 122 123 [IB_WR_REG_MR] = { 124 .length = sizeof(struct ib_reg_wr), 125 .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 126 .flags = RVT_OPERATION_LOCAL, 127 }, 128 129 [IB_WR_LOCAL_INV] = { 130 .length = sizeof(struct ib_send_wr), 131 .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 132 .flags = RVT_OPERATION_LOCAL, 133 }, 134 135 [IB_WR_SEND_WITH_INV] = { 136 .length = sizeof(struct ib_send_wr), 137 .qpt_support = BIT(IB_QPT_RC), 138 }, 139 140 }; 141 142 static void flush_tx_list(struct rvt_qp *qp) 143 { 144 struct hfi1_qp_priv *priv = qp->priv; 145 146 while (!list_empty(&priv->s_iowait.tx_head)) { 147 struct sdma_txreq *tx; 148 149 tx = list_first_entry( 150 &priv->s_iowait.tx_head, 151 struct sdma_txreq, 152 list); 153 list_del_init(&tx->list); 154 hfi1_put_txreq( 155 container_of(tx, struct verbs_txreq, txreq)); 156 } 157 } 158 159 static void flush_iowait(struct rvt_qp *qp) 160 { 161 struct hfi1_qp_priv *priv = qp->priv; 162 unsigned long flags; 163 seqlock_t *lock = priv->s_iowait.lock; 164 165 if (!lock) 166 return; 167 write_seqlock_irqsave(lock, flags); 168 if (!list_empty(&priv->s_iowait.list)) { 169 list_del_init(&priv->s_iowait.list); 170 priv->s_iowait.lock = NULL; 171 rvt_put_qp(qp); 172 } 173 write_sequnlock_irqrestore(lock, flags); 174 } 175 176 static inline int opa_mtu_enum_to_int(int mtu) 177 { 178 switch (mtu) { 179 case OPA_MTU_8192: return 8192; 180 case OPA_MTU_10240: return 10240; 181 default: return -1; 182 } 183 } 184 185 /** 186 * This function is what we would push to the core layer if we wanted to be a 187 * "first class citizen". Instead we hide this here and rely on Verbs ULPs 188 * to blindly pass the MTU enum value from the PathRecord to us. 189 */ 190 static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) 191 { 192 int val; 193 194 /* Constraining 10KB packets to 8KB packets */ 195 if (mtu == (enum ib_mtu)OPA_MTU_10240) 196 mtu = OPA_MTU_8192; 197 val = opa_mtu_enum_to_int((int)mtu); 198 if (val > 0) 199 return val; 200 return ib_mtu_enum_to_int(mtu); 201 } 202 203 int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, 204 int attr_mask, struct ib_udata *udata) 205 { 206 struct ib_qp *ibqp = &qp->ibqp; 207 struct hfi1_ibdev *dev = to_idev(ibqp->device); 208 struct hfi1_devdata *dd = dd_from_dev(dev); 209 u8 sc; 210 211 if (attr_mask & IB_QP_AV) { 212 sc = ah_to_sc(ibqp->device, &attr->ah_attr); 213 if (sc == 0xf) 214 return -EINVAL; 215 216 if (!qp_to_sdma_engine(qp, sc) && 217 dd->flags & HFI1_HAS_SEND_DMA) 218 return -EINVAL; 219 220 if (!qp_to_send_context(qp, sc)) 221 return -EINVAL; 222 } 223 224 if (attr_mask & IB_QP_ALT_PATH) { 225 sc = ah_to_sc(ibqp->device, &attr->alt_ah_attr); 226 if (sc == 0xf) 227 return -EINVAL; 228 229 if (!qp_to_sdma_engine(qp, sc) && 230 dd->flags & HFI1_HAS_SEND_DMA) 231 return -EINVAL; 232 233 if (!qp_to_send_context(qp, sc)) 234 return -EINVAL; 235 } 236 237 return 0; 238 } 239 240 void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, 241 int attr_mask, struct ib_udata *udata) 242 { 243 struct ib_qp *ibqp = &qp->ibqp; 244 struct hfi1_qp_priv *priv = qp->priv; 245 246 if (attr_mask & IB_QP_AV) { 247 priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); 248 priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); 249 priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); 250 } 251 252 if (attr_mask & IB_QP_PATH_MIG_STATE && 253 attr->path_mig_state == IB_MIG_MIGRATED && 254 qp->s_mig_state == IB_MIG_ARMED) { 255 qp->s_flags |= RVT_S_AHG_CLEAR; 256 priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); 257 priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); 258 priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); 259 } 260 } 261 262 /** 263 * hfi1_check_send_wqe - validate wqe 264 * @qp - The qp 265 * @wqe - The built wqe 266 * 267 * validate wqe. This is called 268 * prior to inserting the wqe into 269 * the ring but after the wqe has been 270 * setup. 271 * 272 * Returns 0 on success, -EINVAL on failure 273 * 274 */ 275 int hfi1_check_send_wqe(struct rvt_qp *qp, 276 struct rvt_swqe *wqe) 277 { 278 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 279 struct rvt_ah *ah; 280 281 switch (qp->ibqp.qp_type) { 282 case IB_QPT_RC: 283 case IB_QPT_UC: 284 if (wqe->length > 0x80000000U) 285 return -EINVAL; 286 break; 287 case IB_QPT_SMI: 288 ah = ibah_to_rvtah(wqe->ud_wr.ah); 289 if (wqe->length > (1 << ah->log_pmtu)) 290 return -EINVAL; 291 break; 292 case IB_QPT_GSI: 293 case IB_QPT_UD: 294 ah = ibah_to_rvtah(wqe->ud_wr.ah); 295 if (wqe->length > (1 << ah->log_pmtu)) 296 return -EINVAL; 297 if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf) 298 return -EINVAL; 299 default: 300 break; 301 } 302 return wqe->length <= piothreshold; 303 } 304 305 /** 306 * _hfi1_schedule_send - schedule progress 307 * @qp: the QP 308 * 309 * This schedules qp progress w/o regard to the s_flags. 310 * 311 * It is only used in the post send, which doesn't hold 312 * the s_lock. 313 */ 314 void _hfi1_schedule_send(struct rvt_qp *qp) 315 { 316 struct hfi1_qp_priv *priv = qp->priv; 317 struct hfi1_ibport *ibp = 318 to_iport(qp->ibqp.device, qp->port_num); 319 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 320 struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); 321 322 iowait_schedule(&priv->s_iowait, ppd->hfi1_wq, 323 priv->s_sde ? 324 priv->s_sde->cpu : 325 cpumask_first(cpumask_of_node(dd->node))); 326 } 327 328 static void qp_pio_drain(struct rvt_qp *qp) 329 { 330 struct hfi1_ibdev *dev; 331 struct hfi1_qp_priv *priv = qp->priv; 332 333 if (!priv->s_sendcontext) 334 return; 335 dev = to_idev(qp->ibqp.device); 336 while (iowait_pio_pending(&priv->s_iowait)) { 337 write_seqlock_irq(&dev->iowait_lock); 338 hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1); 339 write_sequnlock_irq(&dev->iowait_lock); 340 iowait_pio_drain(&priv->s_iowait); 341 write_seqlock_irq(&dev->iowait_lock); 342 hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0); 343 write_sequnlock_irq(&dev->iowait_lock); 344 } 345 } 346 347 /** 348 * hfi1_schedule_send - schedule progress 349 * @qp: the QP 350 * 351 * This schedules qp progress and caller should hold 352 * the s_lock. 353 */ 354 void hfi1_schedule_send(struct rvt_qp *qp) 355 { 356 lockdep_assert_held(&qp->s_lock); 357 if (hfi1_send_ok(qp)) 358 _hfi1_schedule_send(qp); 359 } 360 361 void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag) 362 { 363 unsigned long flags; 364 365 spin_lock_irqsave(&qp->s_lock, flags); 366 if (qp->s_flags & flag) { 367 qp->s_flags &= ~flag; 368 trace_hfi1_qpwakeup(qp, flag); 369 hfi1_schedule_send(qp); 370 } 371 spin_unlock_irqrestore(&qp->s_lock, flags); 372 /* Notify hfi1_destroy_qp() if it is waiting. */ 373 rvt_put_qp(qp); 374 } 375 376 static int iowait_sleep( 377 struct sdma_engine *sde, 378 struct iowait *wait, 379 struct sdma_txreq *stx, 380 unsigned seq) 381 { 382 struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq); 383 struct rvt_qp *qp; 384 struct hfi1_qp_priv *priv; 385 unsigned long flags; 386 int ret = 0; 387 struct hfi1_ibdev *dev; 388 389 qp = tx->qp; 390 priv = qp->priv; 391 392 spin_lock_irqsave(&qp->s_lock, flags); 393 if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { 394 /* 395 * If we couldn't queue the DMA request, save the info 396 * and try again later rather than destroying the 397 * buffer and undoing the side effects of the copy. 398 */ 399 /* Make a common routine? */ 400 dev = &sde->dd->verbs_dev; 401 list_add_tail(&stx->list, &wait->tx_head); 402 write_seqlock(&dev->iowait_lock); 403 if (sdma_progress(sde, seq, stx)) 404 goto eagain; 405 if (list_empty(&priv->s_iowait.list)) { 406 struct hfi1_ibport *ibp = 407 to_iport(qp->ibqp.device, qp->port_num); 408 409 ibp->rvp.n_dmawait++; 410 qp->s_flags |= RVT_S_WAIT_DMA_DESC; 411 list_add_tail(&priv->s_iowait.list, &sde->dmawait); 412 priv->s_iowait.lock = &dev->iowait_lock; 413 trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); 414 rvt_get_qp(qp); 415 } 416 write_sequnlock(&dev->iowait_lock); 417 qp->s_flags &= ~RVT_S_BUSY; 418 spin_unlock_irqrestore(&qp->s_lock, flags); 419 ret = -EBUSY; 420 } else { 421 spin_unlock_irqrestore(&qp->s_lock, flags); 422 hfi1_put_txreq(tx); 423 } 424 return ret; 425 eagain: 426 write_sequnlock(&dev->iowait_lock); 427 spin_unlock_irqrestore(&qp->s_lock, flags); 428 list_del_init(&stx->list); 429 return -EAGAIN; 430 } 431 432 static void iowait_wakeup(struct iowait *wait, int reason) 433 { 434 struct rvt_qp *qp = iowait_to_qp(wait); 435 436 WARN_ON(reason != SDMA_AVAIL_REASON); 437 hfi1_qp_wakeup(qp, RVT_S_WAIT_DMA_DESC); 438 } 439 440 static void iowait_sdma_drained(struct iowait *wait) 441 { 442 struct rvt_qp *qp = iowait_to_qp(wait); 443 unsigned long flags; 444 445 /* 446 * This happens when the send engine notes 447 * a QP in the error state and cannot 448 * do the flush work until that QP's 449 * sdma work has finished. 450 */ 451 spin_lock_irqsave(&qp->s_lock, flags); 452 if (qp->s_flags & RVT_S_WAIT_DMA) { 453 qp->s_flags &= ~RVT_S_WAIT_DMA; 454 hfi1_schedule_send(qp); 455 } 456 spin_unlock_irqrestore(&qp->s_lock, flags); 457 } 458 459 /** 460 * 461 * qp_to_sdma_engine - map a qp to a send engine 462 * @qp: the QP 463 * @sc5: the 5 bit sc 464 * 465 * Return: 466 * A send engine for the qp or NULL for SMI type qp. 467 */ 468 struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5) 469 { 470 struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); 471 struct sdma_engine *sde; 472 473 if (!(dd->flags & HFI1_HAS_SEND_DMA)) 474 return NULL; 475 switch (qp->ibqp.qp_type) { 476 case IB_QPT_SMI: 477 return NULL; 478 default: 479 break; 480 } 481 sde = sdma_select_engine_sc(dd, qp->ibqp.qp_num >> dd->qos_shift, sc5); 482 return sde; 483 } 484 485 /* 486 * qp_to_send_context - map a qp to a send context 487 * @qp: the QP 488 * @sc5: the 5 bit sc 489 * 490 * Return: 491 * A send context for the qp 492 */ 493 struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5) 494 { 495 struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); 496 497 switch (qp->ibqp.qp_type) { 498 case IB_QPT_SMI: 499 /* SMA packets to VL15 */ 500 return dd->vld[15].sc; 501 default: 502 break; 503 } 504 505 return pio_select_send_context_sc(dd, qp->ibqp.qp_num >> dd->qos_shift, 506 sc5); 507 } 508 509 struct qp_iter { 510 struct hfi1_ibdev *dev; 511 struct rvt_qp *qp; 512 int specials; 513 int n; 514 }; 515 516 struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev) 517 { 518 struct qp_iter *iter; 519 520 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 521 if (!iter) 522 return NULL; 523 524 iter->dev = dev; 525 iter->specials = dev->rdi.ibdev.phys_port_cnt * 2; 526 527 return iter; 528 } 529 530 int qp_iter_next(struct qp_iter *iter) 531 { 532 struct hfi1_ibdev *dev = iter->dev; 533 int n = iter->n; 534 int ret = 1; 535 struct rvt_qp *pqp = iter->qp; 536 struct rvt_qp *qp; 537 538 /* 539 * The approach is to consider the special qps 540 * as an additional table entries before the 541 * real hash table. Since the qp code sets 542 * the qp->next hash link to NULL, this works just fine. 543 * 544 * iter->specials is 2 * # ports 545 * 546 * n = 0..iter->specials is the special qp indices 547 * 548 * n = iter->specials..dev->rdi.qp_dev->qp_table_size+iter->specials are 549 * the potential hash bucket entries 550 * 551 */ 552 for (; n < dev->rdi.qp_dev->qp_table_size + iter->specials; n++) { 553 if (pqp) { 554 qp = rcu_dereference(pqp->next); 555 } else { 556 if (n < iter->specials) { 557 struct hfi1_pportdata *ppd; 558 struct hfi1_ibport *ibp; 559 int pidx; 560 561 pidx = n % dev->rdi.ibdev.phys_port_cnt; 562 ppd = &dd_from_dev(dev)->pport[pidx]; 563 ibp = &ppd->ibport_data; 564 565 if (!(n & 1)) 566 qp = rcu_dereference(ibp->rvp.qp[0]); 567 else 568 qp = rcu_dereference(ibp->rvp.qp[1]); 569 } else { 570 qp = rcu_dereference( 571 dev->rdi.qp_dev->qp_table[ 572 (n - iter->specials)]); 573 } 574 } 575 pqp = qp; 576 if (qp) { 577 iter->qp = qp; 578 iter->n = n; 579 return 0; 580 } 581 } 582 return ret; 583 } 584 585 static const char * const qp_type_str[] = { 586 "SMI", "GSI", "RC", "UC", "UD", 587 }; 588 589 static int qp_idle(struct rvt_qp *qp) 590 { 591 return 592 qp->s_last == qp->s_acked && 593 qp->s_acked == qp->s_cur && 594 qp->s_cur == qp->s_tail && 595 qp->s_tail == qp->s_head; 596 } 597 598 void qp_iter_print(struct seq_file *s, struct qp_iter *iter) 599 { 600 struct rvt_swqe *wqe; 601 struct rvt_qp *qp = iter->qp; 602 struct hfi1_qp_priv *priv = qp->priv; 603 struct sdma_engine *sde; 604 struct send_context *send_context; 605 606 sde = qp_to_sdma_engine(qp, priv->s_sc); 607 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 608 send_context = qp_to_send_context(qp, priv->s_sc); 609 seq_printf(s, 610 "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x (%u %u %u %u %u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n", 611 iter->n, 612 qp_idle(qp) ? "I" : "B", 613 qp->ibqp.qp_num, 614 atomic_read(&qp->refcount), 615 qp_type_str[qp->ibqp.qp_type], 616 qp->state, 617 wqe ? wqe->wr.opcode : 0, 618 qp->s_hdrwords, 619 qp->s_flags, 620 iowait_sdma_pending(&priv->s_iowait), 621 iowait_pio_pending(&priv->s_iowait), 622 !list_empty(&priv->s_iowait.list), 623 qp->timeout, 624 wqe ? wqe->ssn : 0, 625 qp->s_lsn, 626 qp->s_last_psn, 627 qp->s_psn, qp->s_next_psn, 628 qp->s_sending_psn, qp->s_sending_hpsn, 629 qp->r_psn, 630 qp->s_last, qp->s_acked, qp->s_cur, 631 qp->s_tail, qp->s_head, qp->s_size, 632 qp->s_avail, 633 qp->remote_qpn, 634 rdma_ah_get_dlid(&qp->remote_ah_attr), 635 rdma_ah_get_sl(&qp->remote_ah_attr), 636 qp->pmtu, 637 qp->s_retry, 638 qp->s_retry_cnt, 639 qp->s_rnr_retry_cnt, 640 qp->s_rnr_retry, 641 sde, 642 sde ? sde->this_idx : 0, 643 send_context, 644 send_context ? send_context->sw_index : 0, 645 ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head, 646 ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail, 647 qp->pid); 648 } 649 650 void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp) 651 { 652 struct hfi1_qp_priv *priv; 653 654 priv = kzalloc_node(sizeof(*priv), GFP_KERNEL, rdi->dparms.node); 655 if (!priv) 656 return ERR_PTR(-ENOMEM); 657 658 priv->owner = qp; 659 660 priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), GFP_KERNEL, 661 rdi->dparms.node); 662 if (!priv->s_ahg) { 663 kfree(priv); 664 return ERR_PTR(-ENOMEM); 665 } 666 iowait_init( 667 &priv->s_iowait, 668 1, 669 _hfi1_do_send, 670 iowait_sleep, 671 iowait_wakeup, 672 iowait_sdma_drained); 673 return priv; 674 } 675 676 void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp) 677 { 678 struct hfi1_qp_priv *priv = qp->priv; 679 680 kfree(priv->s_ahg); 681 kfree(priv); 682 } 683 684 unsigned free_all_qps(struct rvt_dev_info *rdi) 685 { 686 struct hfi1_ibdev *verbs_dev = container_of(rdi, 687 struct hfi1_ibdev, 688 rdi); 689 struct hfi1_devdata *dd = container_of(verbs_dev, 690 struct hfi1_devdata, 691 verbs_dev); 692 int n; 693 unsigned qp_inuse = 0; 694 695 for (n = 0; n < dd->num_pports; n++) { 696 struct hfi1_ibport *ibp = &dd->pport[n].ibport_data; 697 698 rcu_read_lock(); 699 if (rcu_dereference(ibp->rvp.qp[0])) 700 qp_inuse++; 701 if (rcu_dereference(ibp->rvp.qp[1])) 702 qp_inuse++; 703 rcu_read_unlock(); 704 } 705 706 return qp_inuse; 707 } 708 709 void flush_qp_waiters(struct rvt_qp *qp) 710 { 711 lockdep_assert_held(&qp->s_lock); 712 flush_iowait(qp); 713 } 714 715 void stop_send_queue(struct rvt_qp *qp) 716 { 717 struct hfi1_qp_priv *priv = qp->priv; 718 719 cancel_work_sync(&priv->s_iowait.iowork); 720 } 721 722 void quiesce_qp(struct rvt_qp *qp) 723 { 724 struct hfi1_qp_priv *priv = qp->priv; 725 726 iowait_sdma_drain(&priv->s_iowait); 727 qp_pio_drain(qp); 728 flush_tx_list(qp); 729 } 730 731 void notify_qp_reset(struct rvt_qp *qp) 732 { 733 qp->r_adefered = 0; 734 clear_ahg(qp); 735 } 736 737 /* 738 * Switch to alternate path. 739 * The QP s_lock should be held and interrupts disabled. 740 */ 741 void hfi1_migrate_qp(struct rvt_qp *qp) 742 { 743 struct hfi1_qp_priv *priv = qp->priv; 744 struct ib_event ev; 745 746 qp->s_mig_state = IB_MIG_MIGRATED; 747 qp->remote_ah_attr = qp->alt_ah_attr; 748 qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr); 749 qp->s_pkey_index = qp->s_alt_pkey_index; 750 qp->s_flags |= RVT_S_AHG_CLEAR; 751 priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr); 752 priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); 753 754 ev.device = qp->ibqp.device; 755 ev.element.qp = &qp->ibqp; 756 ev.event = IB_EVENT_PATH_MIG; 757 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); 758 } 759 760 int mtu_to_path_mtu(u32 mtu) 761 { 762 return mtu_to_enum(mtu, OPA_MTU_8192); 763 } 764 765 u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu) 766 { 767 u32 mtu; 768 struct hfi1_ibdev *verbs_dev = container_of(rdi, 769 struct hfi1_ibdev, 770 rdi); 771 struct hfi1_devdata *dd = container_of(verbs_dev, 772 struct hfi1_devdata, 773 verbs_dev); 774 struct hfi1_ibport *ibp; 775 u8 sc, vl; 776 777 ibp = &dd->pport[qp->port_num - 1].ibport_data; 778 sc = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; 779 vl = sc_to_vlt(dd, sc); 780 781 mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu); 782 if (vl < PER_VL_SEND_CONTEXTS) 783 mtu = min_t(u32, mtu, dd->vld[vl].mtu); 784 return mtu; 785 } 786 787 int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp, 788 struct ib_qp_attr *attr) 789 { 790 int mtu, pidx = qp->port_num - 1; 791 struct hfi1_ibdev *verbs_dev = container_of(rdi, 792 struct hfi1_ibdev, 793 rdi); 794 struct hfi1_devdata *dd = container_of(verbs_dev, 795 struct hfi1_devdata, 796 verbs_dev); 797 mtu = verbs_mtu_enum_to_int(qp->ibqp.device, attr->path_mtu); 798 if (mtu == -1) 799 return -1; /* values less than 0 are error */ 800 801 if (mtu > dd->pport[pidx].ibmtu) 802 return mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048); 803 else 804 return attr->path_mtu; 805 } 806 807 void notify_error_qp(struct rvt_qp *qp) 808 { 809 struct hfi1_qp_priv *priv = qp->priv; 810 seqlock_t *lock = priv->s_iowait.lock; 811 812 if (lock) { 813 write_seqlock(lock); 814 if (!list_empty(&priv->s_iowait.list) && 815 !(qp->s_flags & RVT_S_BUSY)) { 816 qp->s_flags &= ~RVT_S_ANY_WAIT_IO; 817 list_del_init(&priv->s_iowait.list); 818 priv->s_iowait.lock = NULL; 819 rvt_put_qp(qp); 820 } 821 write_sequnlock(lock); 822 } 823 824 if (!(qp->s_flags & RVT_S_BUSY)) { 825 qp->s_hdrwords = 0; 826 if (qp->s_rdma_mr) { 827 rvt_put_mr(qp->s_rdma_mr); 828 qp->s_rdma_mr = NULL; 829 } 830 flush_tx_list(qp); 831 } 832 } 833 834 /** 835 * hfi1_error_port_qps - put a port's RC/UC qps into error state 836 * @ibp: the ibport. 837 * @sl: the service level. 838 * 839 * This function places all RC/UC qps with a given service level into error 840 * state. It is generally called to force upper lay apps to abandon stale qps 841 * after an sl->sc mapping change. 842 */ 843 void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl) 844 { 845 struct rvt_qp *qp = NULL; 846 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 847 struct hfi1_ibdev *dev = &ppd->dd->verbs_dev; 848 int n; 849 int lastwqe; 850 struct ib_event ev; 851 852 rcu_read_lock(); 853 854 /* Deal only with RC/UC qps that use the given SL. */ 855 for (n = 0; n < dev->rdi.qp_dev->qp_table_size; n++) { 856 for (qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); qp; 857 qp = rcu_dereference(qp->next)) { 858 if (qp->port_num == ppd->port && 859 (qp->ibqp.qp_type == IB_QPT_UC || 860 qp->ibqp.qp_type == IB_QPT_RC) && 861 rdma_ah_get_sl(&qp->remote_ah_attr) == sl && 862 (ib_rvt_state_ops[qp->state] & 863 RVT_POST_SEND_OK)) { 864 spin_lock_irq(&qp->r_lock); 865 spin_lock(&qp->s_hlock); 866 spin_lock(&qp->s_lock); 867 lastwqe = rvt_error_qp(qp, 868 IB_WC_WR_FLUSH_ERR); 869 spin_unlock(&qp->s_lock); 870 spin_unlock(&qp->s_hlock); 871 spin_unlock_irq(&qp->r_lock); 872 if (lastwqe) { 873 ev.device = qp->ibqp.device; 874 ev.element.qp = &qp->ibqp; 875 ev.event = 876 IB_EVENT_QP_LAST_WQE_REACHED; 877 qp->ibqp.event_handler(&ev, 878 qp->ibqp.qp_context); 879 } 880 } 881 } 882 } 883 884 rcu_read_unlock(); 885 } 886