1 /* 2 * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. 3 * All rights reserved. 4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35 #include <linux/err.h> 36 #include <linux/vmalloc.h> 37 38 #include "qib.h" 39 40 #define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE) 41 #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) 42 43 static inline unsigned mk_qpn(struct qib_qpn_table *qpt, 44 struct qpn_map *map, unsigned off) 45 { 46 return (map - qpt->map) * BITS_PER_PAGE + off; 47 } 48 49 static inline unsigned find_next_offset(struct qib_qpn_table *qpt, 50 struct qpn_map *map, unsigned off, 51 unsigned n) 52 { 53 if (qpt->mask) { 54 off++; 55 if (((off & qpt->mask) >> 1) >= n) 56 off = (off | qpt->mask) + 2; 57 } else 58 off = find_next_zero_bit(map->page, BITS_PER_PAGE, off); 59 return off; 60 } 61 62 /* 63 * Convert the AETH credit code into the number of credits. 64 */ 65 static u32 credit_table[31] = { 66 0, /* 0 */ 67 1, /* 1 */ 68 2, /* 2 */ 69 3, /* 3 */ 70 4, /* 4 */ 71 6, /* 5 */ 72 8, /* 6 */ 73 12, /* 7 */ 74 16, /* 8 */ 75 24, /* 9 */ 76 32, /* A */ 77 48, /* B */ 78 64, /* C */ 79 96, /* D */ 80 128, /* E */ 81 192, /* F */ 82 256, /* 10 */ 83 384, /* 11 */ 84 512, /* 12 */ 85 768, /* 13 */ 86 1024, /* 14 */ 87 1536, /* 15 */ 88 2048, /* 16 */ 89 3072, /* 17 */ 90 4096, /* 18 */ 91 6144, /* 19 */ 92 8192, /* 1A */ 93 12288, /* 1B */ 94 16384, /* 1C */ 95 24576, /* 1D */ 96 32768 /* 1E */ 97 }; 98 99 static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map) 100 { 101 unsigned long page = get_zeroed_page(GFP_KERNEL); 102 103 /* 104 * Free the page if someone raced with us installing it. 105 */ 106 107 spin_lock(&qpt->lock); 108 if (map->page) 109 free_page(page); 110 else 111 map->page = (void *)page; 112 spin_unlock(&qpt->lock); 113 } 114 115 /* 116 * Allocate the next available QPN or 117 * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. 118 */ 119 static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, 120 enum ib_qp_type type, u8 port) 121 { 122 u32 i, offset, max_scan, qpn; 123 struct qpn_map *map; 124 u32 ret; 125 126 if (type == IB_QPT_SMI || type == IB_QPT_GSI) { 127 unsigned n; 128 129 ret = type == IB_QPT_GSI; 130 n = 1 << (ret + 2 * (port - 1)); 131 spin_lock(&qpt->lock); 132 if (qpt->flags & n) 133 ret = -EINVAL; 134 else 135 qpt->flags |= n; 136 spin_unlock(&qpt->lock); 137 goto bail; 138 } 139 140 qpn = qpt->last + 2; 141 if (qpn >= QPN_MAX) 142 qpn = 2; 143 if (qpt->mask && ((qpn & qpt->mask) >> 1) >= dd->n_krcv_queues) 144 qpn = (qpn | qpt->mask) + 2; 145 offset = qpn & BITS_PER_PAGE_MASK; 146 map = &qpt->map[qpn / BITS_PER_PAGE]; 147 max_scan = qpt->nmaps - !offset; 148 for (i = 0;;) { 149 if (unlikely(!map->page)) { 150 get_map_page(qpt, map); 151 if (unlikely(!map->page)) 152 break; 153 } 154 do { 155 if (!test_and_set_bit(offset, map->page)) { 156 qpt->last = qpn; 157 ret = qpn; 158 goto bail; 159 } 160 offset = find_next_offset(qpt, map, offset, 161 dd->n_krcv_queues); 162 qpn = mk_qpn(qpt, map, offset); 163 /* 164 * This test differs from alloc_pidmap(). 165 * If find_next_offset() does find a zero 166 * bit, we don't need to check for QPN 167 * wrapping around past our starting QPN. 168 * We just need to be sure we don't loop 169 * forever. 170 */ 171 } while (offset < BITS_PER_PAGE && qpn < QPN_MAX); 172 /* 173 * In order to keep the number of pages allocated to a 174 * minimum, we scan the all existing pages before increasing 175 * the size of the bitmap table. 176 */ 177 if (++i > max_scan) { 178 if (qpt->nmaps == QPNMAP_ENTRIES) 179 break; 180 map = &qpt->map[qpt->nmaps++]; 181 offset = 0; 182 } else if (map < &qpt->map[qpt->nmaps]) { 183 ++map; 184 offset = 0; 185 } else { 186 map = &qpt->map[0]; 187 offset = 2; 188 } 189 qpn = mk_qpn(qpt, map, offset); 190 } 191 192 ret = -ENOMEM; 193 194 bail: 195 return ret; 196 } 197 198 static void free_qpn(struct qib_qpn_table *qpt, u32 qpn) 199 { 200 struct qpn_map *map; 201 202 map = qpt->map + qpn / BITS_PER_PAGE; 203 if (map->page) 204 clear_bit(qpn & BITS_PER_PAGE_MASK, map->page); 205 } 206 207 /* 208 * Put the QP into the hash table. 209 * The hash table holds a reference to the QP. 210 */ 211 static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) 212 { 213 struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 214 unsigned n = qp->ibqp.qp_num % dev->qp_table_size; 215 unsigned long flags; 216 217 spin_lock_irqsave(&dev->qpt_lock, flags); 218 219 if (qp->ibqp.qp_num == 0) 220 ibp->qp0 = qp; 221 else if (qp->ibqp.qp_num == 1) 222 ibp->qp1 = qp; 223 else { 224 qp->next = dev->qp_table[n]; 225 dev->qp_table[n] = qp; 226 } 227 atomic_inc(&qp->refcount); 228 229 spin_unlock_irqrestore(&dev->qpt_lock, flags); 230 } 231 232 /* 233 * Remove the QP from the table so it can't be found asynchronously by 234 * the receive interrupt routine. 235 */ 236 static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp) 237 { 238 struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 239 struct qib_qp *q, **qpp; 240 unsigned long flags; 241 242 qpp = &dev->qp_table[qp->ibqp.qp_num % dev->qp_table_size]; 243 244 spin_lock_irqsave(&dev->qpt_lock, flags); 245 246 if (ibp->qp0 == qp) { 247 ibp->qp0 = NULL; 248 atomic_dec(&qp->refcount); 249 } else if (ibp->qp1 == qp) { 250 ibp->qp1 = NULL; 251 atomic_dec(&qp->refcount); 252 } else 253 for (; (q = *qpp) != NULL; qpp = &q->next) 254 if (q == qp) { 255 *qpp = qp->next; 256 qp->next = NULL; 257 atomic_dec(&qp->refcount); 258 break; 259 } 260 261 spin_unlock_irqrestore(&dev->qpt_lock, flags); 262 } 263 264 /** 265 * qib_free_all_qps - check for QPs still in use 266 * @qpt: the QP table to empty 267 * 268 * There should not be any QPs still in use. 269 * Free memory for table. 270 */ 271 unsigned qib_free_all_qps(struct qib_devdata *dd) 272 { 273 struct qib_ibdev *dev = &dd->verbs_dev; 274 unsigned long flags; 275 struct qib_qp *qp; 276 unsigned n, qp_inuse = 0; 277 278 for (n = 0; n < dd->num_pports; n++) { 279 struct qib_ibport *ibp = &dd->pport[n].ibport_data; 280 281 if (!qib_mcast_tree_empty(ibp)) 282 qp_inuse++; 283 if (ibp->qp0) 284 qp_inuse++; 285 if (ibp->qp1) 286 qp_inuse++; 287 } 288 289 spin_lock_irqsave(&dev->qpt_lock, flags); 290 for (n = 0; n < dev->qp_table_size; n++) { 291 qp = dev->qp_table[n]; 292 dev->qp_table[n] = NULL; 293 294 for (; qp; qp = qp->next) 295 qp_inuse++; 296 } 297 spin_unlock_irqrestore(&dev->qpt_lock, flags); 298 299 return qp_inuse; 300 } 301 302 /** 303 * qib_lookup_qpn - return the QP with the given QPN 304 * @qpt: the QP table 305 * @qpn: the QP number to look up 306 * 307 * The caller is responsible for decrementing the QP reference count 308 * when done. 309 */ 310 struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) 311 { 312 struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; 313 unsigned long flags; 314 struct qib_qp *qp; 315 316 spin_lock_irqsave(&dev->qpt_lock, flags); 317 318 if (qpn == 0) 319 qp = ibp->qp0; 320 else if (qpn == 1) 321 qp = ibp->qp1; 322 else 323 for (qp = dev->qp_table[qpn % dev->qp_table_size]; qp; 324 qp = qp->next) 325 if (qp->ibqp.qp_num == qpn) 326 break; 327 if (qp) 328 atomic_inc(&qp->refcount); 329 330 spin_unlock_irqrestore(&dev->qpt_lock, flags); 331 return qp; 332 } 333 334 /** 335 * qib_reset_qp - initialize the QP state to the reset state 336 * @qp: the QP to reset 337 * @type: the QP type 338 */ 339 static void qib_reset_qp(struct qib_qp *qp, enum ib_qp_type type) 340 { 341 qp->remote_qpn = 0; 342 qp->qkey = 0; 343 qp->qp_access_flags = 0; 344 atomic_set(&qp->s_dma_busy, 0); 345 qp->s_flags &= QIB_S_SIGNAL_REQ_WR; 346 qp->s_hdrwords = 0; 347 qp->s_wqe = NULL; 348 qp->s_draining = 0; 349 qp->s_next_psn = 0; 350 qp->s_last_psn = 0; 351 qp->s_sending_psn = 0; 352 qp->s_sending_hpsn = 0; 353 qp->s_psn = 0; 354 qp->r_psn = 0; 355 qp->r_msn = 0; 356 if (type == IB_QPT_RC) { 357 qp->s_state = IB_OPCODE_RC_SEND_LAST; 358 qp->r_state = IB_OPCODE_RC_SEND_LAST; 359 } else { 360 qp->s_state = IB_OPCODE_UC_SEND_LAST; 361 qp->r_state = IB_OPCODE_UC_SEND_LAST; 362 } 363 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; 364 qp->r_nak_state = 0; 365 qp->r_aflags = 0; 366 qp->r_flags = 0; 367 qp->s_head = 0; 368 qp->s_tail = 0; 369 qp->s_cur = 0; 370 qp->s_acked = 0; 371 qp->s_last = 0; 372 qp->s_ssn = 1; 373 qp->s_lsn = 0; 374 qp->s_mig_state = IB_MIG_MIGRATED; 375 memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); 376 qp->r_head_ack_queue = 0; 377 qp->s_tail_ack_queue = 0; 378 qp->s_num_rd_atomic = 0; 379 if (qp->r_rq.wq) { 380 qp->r_rq.wq->head = 0; 381 qp->r_rq.wq->tail = 0; 382 } 383 qp->r_sge.num_sge = 0; 384 } 385 386 static void clear_mr_refs(struct qib_qp *qp, int clr_sends) 387 { 388 unsigned n; 389 390 if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) 391 while (qp->s_rdma_read_sge.num_sge) { 392 atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount); 393 if (--qp->s_rdma_read_sge.num_sge) 394 qp->s_rdma_read_sge.sge = 395 *qp->s_rdma_read_sge.sg_list++; 396 } 397 398 while (qp->r_sge.num_sge) { 399 atomic_dec(&qp->r_sge.sge.mr->refcount); 400 if (--qp->r_sge.num_sge) 401 qp->r_sge.sge = *qp->r_sge.sg_list++; 402 } 403 404 if (clr_sends) { 405 while (qp->s_last != qp->s_head) { 406 struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_last); 407 unsigned i; 408 409 for (i = 0; i < wqe->wr.num_sge; i++) { 410 struct qib_sge *sge = &wqe->sg_list[i]; 411 412 atomic_dec(&sge->mr->refcount); 413 } 414 if (qp->ibqp.qp_type == IB_QPT_UD || 415 qp->ibqp.qp_type == IB_QPT_SMI || 416 qp->ibqp.qp_type == IB_QPT_GSI) 417 atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); 418 if (++qp->s_last >= qp->s_size) 419 qp->s_last = 0; 420 } 421 if (qp->s_rdma_mr) { 422 atomic_dec(&qp->s_rdma_mr->refcount); 423 qp->s_rdma_mr = NULL; 424 } 425 } 426 427 if (qp->ibqp.qp_type != IB_QPT_RC) 428 return; 429 430 for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { 431 struct qib_ack_entry *e = &qp->s_ack_queue[n]; 432 433 if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && 434 e->rdma_sge.mr) { 435 atomic_dec(&e->rdma_sge.mr->refcount); 436 e->rdma_sge.mr = NULL; 437 } 438 } 439 } 440 441 /** 442 * qib_error_qp - put a QP into the error state 443 * @qp: the QP to put into the error state 444 * @err: the receive completion error to signal if a RWQE is active 445 * 446 * Flushes both send and receive work queues. 447 * Returns true if last WQE event should be generated. 448 * The QP r_lock and s_lock should be held and interrupts disabled. 449 * If we are already in error state, just return. 450 */ 451 int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err) 452 { 453 struct qib_ibdev *dev = to_idev(qp->ibqp.device); 454 struct ib_wc wc; 455 int ret = 0; 456 457 if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET) 458 goto bail; 459 460 qp->state = IB_QPS_ERR; 461 462 if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) { 463 qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR); 464 del_timer(&qp->s_timer); 465 } 466 467 if (qp->s_flags & QIB_S_ANY_WAIT_SEND) 468 qp->s_flags &= ~QIB_S_ANY_WAIT_SEND; 469 470 spin_lock(&dev->pending_lock); 471 if (!list_empty(&qp->iowait) && !(qp->s_flags & QIB_S_BUSY)) { 472 qp->s_flags &= ~QIB_S_ANY_WAIT_IO; 473 list_del_init(&qp->iowait); 474 } 475 spin_unlock(&dev->pending_lock); 476 477 if (!(qp->s_flags & QIB_S_BUSY)) { 478 qp->s_hdrwords = 0; 479 if (qp->s_rdma_mr) { 480 atomic_dec(&qp->s_rdma_mr->refcount); 481 qp->s_rdma_mr = NULL; 482 } 483 if (qp->s_tx) { 484 qib_put_txreq(qp->s_tx); 485 qp->s_tx = NULL; 486 } 487 } 488 489 /* Schedule the sending tasklet to drain the send work queue. */ 490 if (qp->s_last != qp->s_head) 491 qib_schedule_send(qp); 492 493 clear_mr_refs(qp, 0); 494 495 memset(&wc, 0, sizeof(wc)); 496 wc.qp = &qp->ibqp; 497 wc.opcode = IB_WC_RECV; 498 499 if (test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) { 500 wc.wr_id = qp->r_wr_id; 501 wc.status = err; 502 qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); 503 } 504 wc.status = IB_WC_WR_FLUSH_ERR; 505 506 if (qp->r_rq.wq) { 507 struct qib_rwq *wq; 508 u32 head; 509 u32 tail; 510 511 spin_lock(&qp->r_rq.lock); 512 513 /* sanity check pointers before trusting them */ 514 wq = qp->r_rq.wq; 515 head = wq->head; 516 if (head >= qp->r_rq.size) 517 head = 0; 518 tail = wq->tail; 519 if (tail >= qp->r_rq.size) 520 tail = 0; 521 while (tail != head) { 522 wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; 523 if (++tail >= qp->r_rq.size) 524 tail = 0; 525 qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); 526 } 527 wq->tail = tail; 528 529 spin_unlock(&qp->r_rq.lock); 530 } else if (qp->ibqp.event_handler) 531 ret = 1; 532 533 bail: 534 return ret; 535 } 536 537 /** 538 * qib_modify_qp - modify the attributes of a queue pair 539 * @ibqp: the queue pair who's attributes we're modifying 540 * @attr: the new attributes 541 * @attr_mask: the mask of attributes to modify 542 * @udata: user data for libibverbs.so 543 * 544 * Returns 0 on success, otherwise returns an errno. 545 */ 546 int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 547 int attr_mask, struct ib_udata *udata) 548 { 549 struct qib_ibdev *dev = to_idev(ibqp->device); 550 struct qib_qp *qp = to_iqp(ibqp); 551 enum ib_qp_state cur_state, new_state; 552 struct ib_event ev; 553 int lastwqe = 0; 554 int mig = 0; 555 int ret; 556 u32 pmtu = 0; /* for gcc warning only */ 557 558 spin_lock_irq(&qp->r_lock); 559 spin_lock(&qp->s_lock); 560 561 cur_state = attr_mask & IB_QP_CUR_STATE ? 562 attr->cur_qp_state : qp->state; 563 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 564 565 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, 566 attr_mask)) 567 goto inval; 568 569 if (attr_mask & IB_QP_AV) { 570 if (attr->ah_attr.dlid >= QIB_MULTICAST_LID_BASE) 571 goto inval; 572 if (qib_check_ah(qp->ibqp.device, &attr->ah_attr)) 573 goto inval; 574 } 575 576 if (attr_mask & IB_QP_ALT_PATH) { 577 if (attr->alt_ah_attr.dlid >= QIB_MULTICAST_LID_BASE) 578 goto inval; 579 if (qib_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) 580 goto inval; 581 if (attr->alt_pkey_index >= qib_get_npkeys(dd_from_dev(dev))) 582 goto inval; 583 } 584 585 if (attr_mask & IB_QP_PKEY_INDEX) 586 if (attr->pkey_index >= qib_get_npkeys(dd_from_dev(dev))) 587 goto inval; 588 589 if (attr_mask & IB_QP_MIN_RNR_TIMER) 590 if (attr->min_rnr_timer > 31) 591 goto inval; 592 593 if (attr_mask & IB_QP_PORT) 594 if (qp->ibqp.qp_type == IB_QPT_SMI || 595 qp->ibqp.qp_type == IB_QPT_GSI || 596 attr->port_num == 0 || 597 attr->port_num > ibqp->device->phys_port_cnt) 598 goto inval; 599 600 if (attr_mask & IB_QP_DEST_QPN) 601 if (attr->dest_qp_num > QIB_QPN_MASK) 602 goto inval; 603 604 if (attr_mask & IB_QP_RETRY_CNT) 605 if (attr->retry_cnt > 7) 606 goto inval; 607 608 if (attr_mask & IB_QP_RNR_RETRY) 609 if (attr->rnr_retry > 7) 610 goto inval; 611 612 /* 613 * Don't allow invalid path_mtu values. OK to set greater 614 * than the active mtu (or even the max_cap, if we have tuned 615 * that to a small mtu. We'll set qp->path_mtu 616 * to the lesser of requested attribute mtu and active, 617 * for packetizing messages. 618 * Note that the QP port has to be set in INIT and MTU in RTR. 619 */ 620 if (attr_mask & IB_QP_PATH_MTU) { 621 struct qib_devdata *dd = dd_from_dev(dev); 622 int mtu, pidx = qp->port_num - 1; 623 624 mtu = ib_mtu_enum_to_int(attr->path_mtu); 625 if (mtu == -1) 626 goto inval; 627 if (mtu > dd->pport[pidx].ibmtu) { 628 switch (dd->pport[pidx].ibmtu) { 629 case 4096: 630 pmtu = IB_MTU_4096; 631 break; 632 case 2048: 633 pmtu = IB_MTU_2048; 634 break; 635 case 1024: 636 pmtu = IB_MTU_1024; 637 break; 638 case 512: 639 pmtu = IB_MTU_512; 640 break; 641 case 256: 642 pmtu = IB_MTU_256; 643 break; 644 default: 645 pmtu = IB_MTU_2048; 646 } 647 } else 648 pmtu = attr->path_mtu; 649 } 650 651 if (attr_mask & IB_QP_PATH_MIG_STATE) { 652 if (attr->path_mig_state == IB_MIG_REARM) { 653 if (qp->s_mig_state == IB_MIG_ARMED) 654 goto inval; 655 if (new_state != IB_QPS_RTS) 656 goto inval; 657 } else if (attr->path_mig_state == IB_MIG_MIGRATED) { 658 if (qp->s_mig_state == IB_MIG_REARM) 659 goto inval; 660 if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD) 661 goto inval; 662 if (qp->s_mig_state == IB_MIG_ARMED) 663 mig = 1; 664 } else 665 goto inval; 666 } 667 668 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 669 if (attr->max_dest_rd_atomic > QIB_MAX_RDMA_ATOMIC) 670 goto inval; 671 672 switch (new_state) { 673 case IB_QPS_RESET: 674 if (qp->state != IB_QPS_RESET) { 675 qp->state = IB_QPS_RESET; 676 spin_lock(&dev->pending_lock); 677 if (!list_empty(&qp->iowait)) 678 list_del_init(&qp->iowait); 679 spin_unlock(&dev->pending_lock); 680 qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT); 681 spin_unlock(&qp->s_lock); 682 spin_unlock_irq(&qp->r_lock); 683 /* Stop the sending work queue and retry timer */ 684 cancel_work_sync(&qp->s_work); 685 del_timer_sync(&qp->s_timer); 686 wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); 687 if (qp->s_tx) { 688 qib_put_txreq(qp->s_tx); 689 qp->s_tx = NULL; 690 } 691 remove_qp(dev, qp); 692 wait_event(qp->wait, !atomic_read(&qp->refcount)); 693 spin_lock_irq(&qp->r_lock); 694 spin_lock(&qp->s_lock); 695 clear_mr_refs(qp, 1); 696 qib_reset_qp(qp, ibqp->qp_type); 697 } 698 break; 699 700 case IB_QPS_RTR: 701 /* Allow event to retrigger if QP set to RTR more than once */ 702 qp->r_flags &= ~QIB_R_COMM_EST; 703 qp->state = new_state; 704 break; 705 706 case IB_QPS_SQD: 707 qp->s_draining = qp->s_last != qp->s_cur; 708 qp->state = new_state; 709 break; 710 711 case IB_QPS_SQE: 712 if (qp->ibqp.qp_type == IB_QPT_RC) 713 goto inval; 714 qp->state = new_state; 715 break; 716 717 case IB_QPS_ERR: 718 lastwqe = qib_error_qp(qp, IB_WC_WR_FLUSH_ERR); 719 break; 720 721 default: 722 qp->state = new_state; 723 break; 724 } 725 726 if (attr_mask & IB_QP_PKEY_INDEX) 727 qp->s_pkey_index = attr->pkey_index; 728 729 if (attr_mask & IB_QP_PORT) 730 qp->port_num = attr->port_num; 731 732 if (attr_mask & IB_QP_DEST_QPN) 733 qp->remote_qpn = attr->dest_qp_num; 734 735 if (attr_mask & IB_QP_SQ_PSN) { 736 qp->s_next_psn = attr->sq_psn & QIB_PSN_MASK; 737 qp->s_psn = qp->s_next_psn; 738 qp->s_sending_psn = qp->s_next_psn; 739 qp->s_last_psn = qp->s_next_psn - 1; 740 qp->s_sending_hpsn = qp->s_last_psn; 741 } 742 743 if (attr_mask & IB_QP_RQ_PSN) 744 qp->r_psn = attr->rq_psn & QIB_PSN_MASK; 745 746 if (attr_mask & IB_QP_ACCESS_FLAGS) 747 qp->qp_access_flags = attr->qp_access_flags; 748 749 if (attr_mask & IB_QP_AV) { 750 qp->remote_ah_attr = attr->ah_attr; 751 qp->s_srate = attr->ah_attr.static_rate; 752 } 753 754 if (attr_mask & IB_QP_ALT_PATH) { 755 qp->alt_ah_attr = attr->alt_ah_attr; 756 qp->s_alt_pkey_index = attr->alt_pkey_index; 757 } 758 759 if (attr_mask & IB_QP_PATH_MIG_STATE) { 760 qp->s_mig_state = attr->path_mig_state; 761 if (mig) { 762 qp->remote_ah_attr = qp->alt_ah_attr; 763 qp->port_num = qp->alt_ah_attr.port_num; 764 qp->s_pkey_index = qp->s_alt_pkey_index; 765 } 766 } 767 768 if (attr_mask & IB_QP_PATH_MTU) 769 qp->path_mtu = pmtu; 770 771 if (attr_mask & IB_QP_RETRY_CNT) { 772 qp->s_retry_cnt = attr->retry_cnt; 773 qp->s_retry = attr->retry_cnt; 774 } 775 776 if (attr_mask & IB_QP_RNR_RETRY) { 777 qp->s_rnr_retry_cnt = attr->rnr_retry; 778 qp->s_rnr_retry = attr->rnr_retry; 779 } 780 781 if (attr_mask & IB_QP_MIN_RNR_TIMER) 782 qp->r_min_rnr_timer = attr->min_rnr_timer; 783 784 if (attr_mask & IB_QP_TIMEOUT) 785 qp->timeout = attr->timeout; 786 787 if (attr_mask & IB_QP_QKEY) 788 qp->qkey = attr->qkey; 789 790 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 791 qp->r_max_rd_atomic = attr->max_dest_rd_atomic; 792 793 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) 794 qp->s_max_rd_atomic = attr->max_rd_atomic; 795 796 spin_unlock(&qp->s_lock); 797 spin_unlock_irq(&qp->r_lock); 798 799 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) 800 insert_qp(dev, qp); 801 802 if (lastwqe) { 803 ev.device = qp->ibqp.device; 804 ev.element.qp = &qp->ibqp; 805 ev.event = IB_EVENT_QP_LAST_WQE_REACHED; 806 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); 807 } 808 if (mig) { 809 ev.device = qp->ibqp.device; 810 ev.element.qp = &qp->ibqp; 811 ev.event = IB_EVENT_PATH_MIG; 812 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); 813 } 814 ret = 0; 815 goto bail; 816 817 inval: 818 spin_unlock(&qp->s_lock); 819 spin_unlock_irq(&qp->r_lock); 820 ret = -EINVAL; 821 822 bail: 823 return ret; 824 } 825 826 int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 827 int attr_mask, struct ib_qp_init_attr *init_attr) 828 { 829 struct qib_qp *qp = to_iqp(ibqp); 830 831 attr->qp_state = qp->state; 832 attr->cur_qp_state = attr->qp_state; 833 attr->path_mtu = qp->path_mtu; 834 attr->path_mig_state = qp->s_mig_state; 835 attr->qkey = qp->qkey; 836 attr->rq_psn = qp->r_psn & QIB_PSN_MASK; 837 attr->sq_psn = qp->s_next_psn & QIB_PSN_MASK; 838 attr->dest_qp_num = qp->remote_qpn; 839 attr->qp_access_flags = qp->qp_access_flags; 840 attr->cap.max_send_wr = qp->s_size - 1; 841 attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1; 842 attr->cap.max_send_sge = qp->s_max_sge; 843 attr->cap.max_recv_sge = qp->r_rq.max_sge; 844 attr->cap.max_inline_data = 0; 845 attr->ah_attr = qp->remote_ah_attr; 846 attr->alt_ah_attr = qp->alt_ah_attr; 847 attr->pkey_index = qp->s_pkey_index; 848 attr->alt_pkey_index = qp->s_alt_pkey_index; 849 attr->en_sqd_async_notify = 0; 850 attr->sq_draining = qp->s_draining; 851 attr->max_rd_atomic = qp->s_max_rd_atomic; 852 attr->max_dest_rd_atomic = qp->r_max_rd_atomic; 853 attr->min_rnr_timer = qp->r_min_rnr_timer; 854 attr->port_num = qp->port_num; 855 attr->timeout = qp->timeout; 856 attr->retry_cnt = qp->s_retry_cnt; 857 attr->rnr_retry = qp->s_rnr_retry_cnt; 858 attr->alt_port_num = qp->alt_ah_attr.port_num; 859 attr->alt_timeout = qp->alt_timeout; 860 861 init_attr->event_handler = qp->ibqp.event_handler; 862 init_attr->qp_context = qp->ibqp.qp_context; 863 init_attr->send_cq = qp->ibqp.send_cq; 864 init_attr->recv_cq = qp->ibqp.recv_cq; 865 init_attr->srq = qp->ibqp.srq; 866 init_attr->cap = attr->cap; 867 if (qp->s_flags & QIB_S_SIGNAL_REQ_WR) 868 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; 869 else 870 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; 871 init_attr->qp_type = qp->ibqp.qp_type; 872 init_attr->port_num = qp->port_num; 873 return 0; 874 } 875 876 /** 877 * qib_compute_aeth - compute the AETH (syndrome + MSN) 878 * @qp: the queue pair to compute the AETH for 879 * 880 * Returns the AETH. 881 */ 882 __be32 qib_compute_aeth(struct qib_qp *qp) 883 { 884 u32 aeth = qp->r_msn & QIB_MSN_MASK; 885 886 if (qp->ibqp.srq) { 887 /* 888 * Shared receive queues don't generate credits. 889 * Set the credit field to the invalid value. 890 */ 891 aeth |= QIB_AETH_CREDIT_INVAL << QIB_AETH_CREDIT_SHIFT; 892 } else { 893 u32 min, max, x; 894 u32 credits; 895 struct qib_rwq *wq = qp->r_rq.wq; 896 u32 head; 897 u32 tail; 898 899 /* sanity check pointers before trusting them */ 900 head = wq->head; 901 if (head >= qp->r_rq.size) 902 head = 0; 903 tail = wq->tail; 904 if (tail >= qp->r_rq.size) 905 tail = 0; 906 /* 907 * Compute the number of credits available (RWQEs). 908 * XXX Not holding the r_rq.lock here so there is a small 909 * chance that the pair of reads are not atomic. 910 */ 911 credits = head - tail; 912 if ((int)credits < 0) 913 credits += qp->r_rq.size; 914 /* 915 * Binary search the credit table to find the code to 916 * use. 917 */ 918 min = 0; 919 max = 31; 920 for (;;) { 921 x = (min + max) / 2; 922 if (credit_table[x] == credits) 923 break; 924 if (credit_table[x] > credits) 925 max = x; 926 else if (min == x) 927 break; 928 else 929 min = x; 930 } 931 aeth |= x << QIB_AETH_CREDIT_SHIFT; 932 } 933 return cpu_to_be32(aeth); 934 } 935 936 /** 937 * qib_create_qp - create a queue pair for a device 938 * @ibpd: the protection domain who's device we create the queue pair for 939 * @init_attr: the attributes of the queue pair 940 * @udata: user data for libibverbs.so 941 * 942 * Returns the queue pair on success, otherwise returns an errno. 943 * 944 * Called by the ib_create_qp() core verbs function. 945 */ 946 struct ib_qp *qib_create_qp(struct ib_pd *ibpd, 947 struct ib_qp_init_attr *init_attr, 948 struct ib_udata *udata) 949 { 950 struct qib_qp *qp; 951 int err; 952 struct qib_swqe *swq = NULL; 953 struct qib_ibdev *dev; 954 struct qib_devdata *dd; 955 size_t sz; 956 size_t sg_list_sz; 957 struct ib_qp *ret; 958 959 if (init_attr->cap.max_send_sge > ib_qib_max_sges || 960 init_attr->cap.max_send_wr > ib_qib_max_qp_wrs) { 961 ret = ERR_PTR(-EINVAL); 962 goto bail; 963 } 964 965 /* Check receive queue parameters if no SRQ is specified. */ 966 if (!init_attr->srq) { 967 if (init_attr->cap.max_recv_sge > ib_qib_max_sges || 968 init_attr->cap.max_recv_wr > ib_qib_max_qp_wrs) { 969 ret = ERR_PTR(-EINVAL); 970 goto bail; 971 } 972 if (init_attr->cap.max_send_sge + 973 init_attr->cap.max_send_wr + 974 init_attr->cap.max_recv_sge + 975 init_attr->cap.max_recv_wr == 0) { 976 ret = ERR_PTR(-EINVAL); 977 goto bail; 978 } 979 } 980 981 switch (init_attr->qp_type) { 982 case IB_QPT_SMI: 983 case IB_QPT_GSI: 984 if (init_attr->port_num == 0 || 985 init_attr->port_num > ibpd->device->phys_port_cnt) { 986 ret = ERR_PTR(-EINVAL); 987 goto bail; 988 } 989 case IB_QPT_UC: 990 case IB_QPT_RC: 991 case IB_QPT_UD: 992 sz = sizeof(struct qib_sge) * 993 init_attr->cap.max_send_sge + 994 sizeof(struct qib_swqe); 995 swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); 996 if (swq == NULL) { 997 ret = ERR_PTR(-ENOMEM); 998 goto bail; 999 } 1000 sz = sizeof(*qp); 1001 sg_list_sz = 0; 1002 if (init_attr->srq) { 1003 struct qib_srq *srq = to_isrq(init_attr->srq); 1004 1005 if (srq->rq.max_sge > 1) 1006 sg_list_sz = sizeof(*qp->r_sg_list) * 1007 (srq->rq.max_sge - 1); 1008 } else if (init_attr->cap.max_recv_sge > 1) 1009 sg_list_sz = sizeof(*qp->r_sg_list) * 1010 (init_attr->cap.max_recv_sge - 1); 1011 qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); 1012 if (!qp) { 1013 ret = ERR_PTR(-ENOMEM); 1014 goto bail_swq; 1015 } 1016 if (init_attr->srq) 1017 sz = 0; 1018 else { 1019 qp->r_rq.size = init_attr->cap.max_recv_wr + 1; 1020 qp->r_rq.max_sge = init_attr->cap.max_recv_sge; 1021 sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + 1022 sizeof(struct qib_rwqe); 1023 qp->r_rq.wq = vmalloc_user(sizeof(struct qib_rwq) + 1024 qp->r_rq.size * sz); 1025 if (!qp->r_rq.wq) { 1026 ret = ERR_PTR(-ENOMEM); 1027 goto bail_qp; 1028 } 1029 } 1030 1031 /* 1032 * ib_create_qp() will initialize qp->ibqp 1033 * except for qp->ibqp.qp_num. 1034 */ 1035 spin_lock_init(&qp->r_lock); 1036 spin_lock_init(&qp->s_lock); 1037 spin_lock_init(&qp->r_rq.lock); 1038 atomic_set(&qp->refcount, 0); 1039 init_waitqueue_head(&qp->wait); 1040 init_waitqueue_head(&qp->wait_dma); 1041 init_timer(&qp->s_timer); 1042 qp->s_timer.data = (unsigned long)qp; 1043 INIT_WORK(&qp->s_work, qib_do_send); 1044 INIT_LIST_HEAD(&qp->iowait); 1045 INIT_LIST_HEAD(&qp->rspwait); 1046 qp->state = IB_QPS_RESET; 1047 qp->s_wq = swq; 1048 qp->s_size = init_attr->cap.max_send_wr + 1; 1049 qp->s_max_sge = init_attr->cap.max_send_sge; 1050 if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) 1051 qp->s_flags = QIB_S_SIGNAL_REQ_WR; 1052 dev = to_idev(ibpd->device); 1053 dd = dd_from_dev(dev); 1054 err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type, 1055 init_attr->port_num); 1056 if (err < 0) { 1057 ret = ERR_PTR(err); 1058 vfree(qp->r_rq.wq); 1059 goto bail_qp; 1060 } 1061 qp->ibqp.qp_num = err; 1062 qp->port_num = init_attr->port_num; 1063 qib_reset_qp(qp, init_attr->qp_type); 1064 break; 1065 1066 default: 1067 /* Don't support raw QPs */ 1068 ret = ERR_PTR(-ENOSYS); 1069 goto bail; 1070 } 1071 1072 init_attr->cap.max_inline_data = 0; 1073 1074 /* 1075 * Return the address of the RWQ as the offset to mmap. 1076 * See qib_mmap() for details. 1077 */ 1078 if (udata && udata->outlen >= sizeof(__u64)) { 1079 if (!qp->r_rq.wq) { 1080 __u64 offset = 0; 1081 1082 err = ib_copy_to_udata(udata, &offset, 1083 sizeof(offset)); 1084 if (err) { 1085 ret = ERR_PTR(err); 1086 goto bail_ip; 1087 } 1088 } else { 1089 u32 s = sizeof(struct qib_rwq) + qp->r_rq.size * sz; 1090 1091 qp->ip = qib_create_mmap_info(dev, s, 1092 ibpd->uobject->context, 1093 qp->r_rq.wq); 1094 if (!qp->ip) { 1095 ret = ERR_PTR(-ENOMEM); 1096 goto bail_ip; 1097 } 1098 1099 err = ib_copy_to_udata(udata, &(qp->ip->offset), 1100 sizeof(qp->ip->offset)); 1101 if (err) { 1102 ret = ERR_PTR(err); 1103 goto bail_ip; 1104 } 1105 } 1106 } 1107 1108 spin_lock(&dev->n_qps_lock); 1109 if (dev->n_qps_allocated == ib_qib_max_qps) { 1110 spin_unlock(&dev->n_qps_lock); 1111 ret = ERR_PTR(-ENOMEM); 1112 goto bail_ip; 1113 } 1114 1115 dev->n_qps_allocated++; 1116 spin_unlock(&dev->n_qps_lock); 1117 1118 if (qp->ip) { 1119 spin_lock_irq(&dev->pending_lock); 1120 list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps); 1121 spin_unlock_irq(&dev->pending_lock); 1122 } 1123 1124 ret = &qp->ibqp; 1125 goto bail; 1126 1127 bail_ip: 1128 if (qp->ip) 1129 kref_put(&qp->ip->ref, qib_release_mmap_info); 1130 else 1131 vfree(qp->r_rq.wq); 1132 free_qpn(&dev->qpn_table, qp->ibqp.qp_num); 1133 bail_qp: 1134 kfree(qp); 1135 bail_swq: 1136 vfree(swq); 1137 bail: 1138 return ret; 1139 } 1140 1141 /** 1142 * qib_destroy_qp - destroy a queue pair 1143 * @ibqp: the queue pair to destroy 1144 * 1145 * Returns 0 on success. 1146 * 1147 * Note that this can be called while the QP is actively sending or 1148 * receiving! 1149 */ 1150 int qib_destroy_qp(struct ib_qp *ibqp) 1151 { 1152 struct qib_qp *qp = to_iqp(ibqp); 1153 struct qib_ibdev *dev = to_idev(ibqp->device); 1154 1155 /* Make sure HW and driver activity is stopped. */ 1156 spin_lock_irq(&qp->s_lock); 1157 if (qp->state != IB_QPS_RESET) { 1158 qp->state = IB_QPS_RESET; 1159 spin_lock(&dev->pending_lock); 1160 if (!list_empty(&qp->iowait)) 1161 list_del_init(&qp->iowait); 1162 spin_unlock(&dev->pending_lock); 1163 qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT); 1164 spin_unlock_irq(&qp->s_lock); 1165 cancel_work_sync(&qp->s_work); 1166 del_timer_sync(&qp->s_timer); 1167 wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); 1168 if (qp->s_tx) { 1169 qib_put_txreq(qp->s_tx); 1170 qp->s_tx = NULL; 1171 } 1172 remove_qp(dev, qp); 1173 wait_event(qp->wait, !atomic_read(&qp->refcount)); 1174 clear_mr_refs(qp, 1); 1175 } else 1176 spin_unlock_irq(&qp->s_lock); 1177 1178 /* all user's cleaned up, mark it available */ 1179 free_qpn(&dev->qpn_table, qp->ibqp.qp_num); 1180 spin_lock(&dev->n_qps_lock); 1181 dev->n_qps_allocated--; 1182 spin_unlock(&dev->n_qps_lock); 1183 1184 if (qp->ip) 1185 kref_put(&qp->ip->ref, qib_release_mmap_info); 1186 else 1187 vfree(qp->r_rq.wq); 1188 vfree(qp->s_wq); 1189 kfree(qp); 1190 return 0; 1191 } 1192 1193 /** 1194 * qib_init_qpn_table - initialize the QP number table for a device 1195 * @qpt: the QPN table 1196 */ 1197 void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt) 1198 { 1199 spin_lock_init(&qpt->lock); 1200 qpt->last = 1; /* start with QPN 2 */ 1201 qpt->nmaps = 1; 1202 qpt->mask = dd->qpn_mask; 1203 } 1204 1205 /** 1206 * qib_free_qpn_table - free the QP number table for a device 1207 * @qpt: the QPN table 1208 */ 1209 void qib_free_qpn_table(struct qib_qpn_table *qpt) 1210 { 1211 int i; 1212 1213 for (i = 0; i < ARRAY_SIZE(qpt->map); i++) 1214 if (qpt->map[i].page) 1215 free_page((unsigned long) qpt->map[i].page); 1216 } 1217 1218 /** 1219 * qib_get_credit - flush the send work queue of a QP 1220 * @qp: the qp who's send work queue to flush 1221 * @aeth: the Acknowledge Extended Transport Header 1222 * 1223 * The QP s_lock should be held. 1224 */ 1225 void qib_get_credit(struct qib_qp *qp, u32 aeth) 1226 { 1227 u32 credit = (aeth >> QIB_AETH_CREDIT_SHIFT) & QIB_AETH_CREDIT_MASK; 1228 1229 /* 1230 * If the credit is invalid, we can send 1231 * as many packets as we like. Otherwise, we have to 1232 * honor the credit field. 1233 */ 1234 if (credit == QIB_AETH_CREDIT_INVAL) { 1235 if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) { 1236 qp->s_flags |= QIB_S_UNLIMITED_CREDIT; 1237 if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) { 1238 qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT; 1239 qib_schedule_send(qp); 1240 } 1241 } 1242 } else if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) { 1243 /* Compute new LSN (i.e., MSN + credit) */ 1244 credit = (aeth + credit_table[credit]) & QIB_MSN_MASK; 1245 if (qib_cmp24(credit, qp->s_lsn) > 0) { 1246 qp->s_lsn = credit; 1247 if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) { 1248 qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT; 1249 qib_schedule_send(qp); 1250 } 1251 } 1252 } 1253 } 1254