1 /* 2 * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. 3 * All rights reserved. 4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35 #include <linux/err.h> 36 #include <linux/vmalloc.h> 37 38 #include "qib.h" 39 40 #define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE) 41 #define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) 42 43 static inline unsigned mk_qpn(struct qib_qpn_table *qpt, 44 struct qpn_map *map, unsigned off) 45 { 46 return (map - qpt->map) * BITS_PER_PAGE + off; 47 } 48 49 static inline unsigned find_next_offset(struct qib_qpn_table *qpt, 50 struct qpn_map *map, unsigned off, 51 unsigned r) 52 { 53 if (qpt->mask) { 54 off++; 55 if ((off & qpt->mask) >> 1 != r) 56 off = ((off & qpt->mask) ? 57 (off | qpt->mask) + 1 : off) | (r << 1); 58 } else 59 off = find_next_zero_bit(map->page, BITS_PER_PAGE, off); 60 return off; 61 } 62 63 /* 64 * Convert the AETH credit code into the number of credits. 65 */ 66 static u32 credit_table[31] = { 67 0, /* 0 */ 68 1, /* 1 */ 69 2, /* 2 */ 70 3, /* 3 */ 71 4, /* 4 */ 72 6, /* 5 */ 73 8, /* 6 */ 74 12, /* 7 */ 75 16, /* 8 */ 76 24, /* 9 */ 77 32, /* A */ 78 48, /* B */ 79 64, /* C */ 80 96, /* D */ 81 128, /* E */ 82 192, /* F */ 83 256, /* 10 */ 84 384, /* 11 */ 85 512, /* 12 */ 86 768, /* 13 */ 87 1024, /* 14 */ 88 1536, /* 15 */ 89 2048, /* 16 */ 90 3072, /* 17 */ 91 4096, /* 18 */ 92 6144, /* 19 */ 93 8192, /* 1A */ 94 12288, /* 1B */ 95 16384, /* 1C */ 96 24576, /* 1D */ 97 32768 /* 1E */ 98 }; 99 100 static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map) 101 { 102 unsigned long page = get_zeroed_page(GFP_KERNEL); 103 104 /* 105 * Free the page if someone raced with us installing it. 106 */ 107 108 spin_lock(&qpt->lock); 109 if (map->page) 110 free_page(page); 111 else 112 map->page = (void *)page; 113 spin_unlock(&qpt->lock); 114 } 115 116 /* 117 * Allocate the next available QPN or 118 * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. 119 */ 120 static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, 121 enum ib_qp_type type, u8 port) 122 { 123 u32 i, offset, max_scan, qpn; 124 struct qpn_map *map; 125 u32 ret; 126 int r; 127 128 if (type == IB_QPT_SMI || type == IB_QPT_GSI) { 129 unsigned n; 130 131 ret = type == IB_QPT_GSI; 132 n = 1 << (ret + 2 * (port - 1)); 133 spin_lock(&qpt->lock); 134 if (qpt->flags & n) 135 ret = -EINVAL; 136 else 137 qpt->flags |= n; 138 spin_unlock(&qpt->lock); 139 goto bail; 140 } 141 142 r = smp_processor_id(); 143 if (r >= dd->n_krcv_queues) 144 r %= dd->n_krcv_queues; 145 qpn = qpt->last + 1; 146 if (qpn >= QPN_MAX) 147 qpn = 2; 148 if (qpt->mask && ((qpn & qpt->mask) >> 1) != r) 149 qpn = ((qpn & qpt->mask) ? (qpn | qpt->mask) + 1 : qpn) | 150 (r << 1); 151 offset = qpn & BITS_PER_PAGE_MASK; 152 map = &qpt->map[qpn / BITS_PER_PAGE]; 153 max_scan = qpt->nmaps - !offset; 154 for (i = 0;;) { 155 if (unlikely(!map->page)) { 156 get_map_page(qpt, map); 157 if (unlikely(!map->page)) 158 break; 159 } 160 do { 161 if (!test_and_set_bit(offset, map->page)) { 162 qpt->last = qpn; 163 ret = qpn; 164 goto bail; 165 } 166 offset = find_next_offset(qpt, map, offset, r); 167 qpn = mk_qpn(qpt, map, offset); 168 /* 169 * This test differs from alloc_pidmap(). 170 * If find_next_offset() does find a zero 171 * bit, we don't need to check for QPN 172 * wrapping around past our starting QPN. 173 * We just need to be sure we don't loop 174 * forever. 175 */ 176 } while (offset < BITS_PER_PAGE && qpn < QPN_MAX); 177 /* 178 * In order to keep the number of pages allocated to a 179 * minimum, we scan the all existing pages before increasing 180 * the size of the bitmap table. 181 */ 182 if (++i > max_scan) { 183 if (qpt->nmaps == QPNMAP_ENTRIES) 184 break; 185 map = &qpt->map[qpt->nmaps++]; 186 offset = qpt->mask ? (r << 1) : 0; 187 } else if (map < &qpt->map[qpt->nmaps]) { 188 ++map; 189 offset = qpt->mask ? (r << 1) : 0; 190 } else { 191 map = &qpt->map[0]; 192 offset = qpt->mask ? (r << 1) : 2; 193 } 194 qpn = mk_qpn(qpt, map, offset); 195 } 196 197 ret = -ENOMEM; 198 199 bail: 200 return ret; 201 } 202 203 static void free_qpn(struct qib_qpn_table *qpt, u32 qpn) 204 { 205 struct qpn_map *map; 206 207 map = qpt->map + qpn / BITS_PER_PAGE; 208 if (map->page) 209 clear_bit(qpn & BITS_PER_PAGE_MASK, map->page); 210 } 211 212 /* 213 * Put the QP into the hash table. 214 * The hash table holds a reference to the QP. 215 */ 216 static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) 217 { 218 struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 219 unsigned n = qp->ibqp.qp_num % dev->qp_table_size; 220 unsigned long flags; 221 222 spin_lock_irqsave(&dev->qpt_lock, flags); 223 224 if (qp->ibqp.qp_num == 0) 225 ibp->qp0 = qp; 226 else if (qp->ibqp.qp_num == 1) 227 ibp->qp1 = qp; 228 else { 229 qp->next = dev->qp_table[n]; 230 dev->qp_table[n] = qp; 231 } 232 atomic_inc(&qp->refcount); 233 234 spin_unlock_irqrestore(&dev->qpt_lock, flags); 235 } 236 237 /* 238 * Remove the QP from the table so it can't be found asynchronously by 239 * the receive interrupt routine. 240 */ 241 static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp) 242 { 243 struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 244 struct qib_qp *q, **qpp; 245 unsigned long flags; 246 247 qpp = &dev->qp_table[qp->ibqp.qp_num % dev->qp_table_size]; 248 249 spin_lock_irqsave(&dev->qpt_lock, flags); 250 251 if (ibp->qp0 == qp) { 252 ibp->qp0 = NULL; 253 atomic_dec(&qp->refcount); 254 } else if (ibp->qp1 == qp) { 255 ibp->qp1 = NULL; 256 atomic_dec(&qp->refcount); 257 } else 258 for (; (q = *qpp) != NULL; qpp = &q->next) 259 if (q == qp) { 260 *qpp = qp->next; 261 qp->next = NULL; 262 atomic_dec(&qp->refcount); 263 break; 264 } 265 266 spin_unlock_irqrestore(&dev->qpt_lock, flags); 267 } 268 269 /** 270 * qib_free_all_qps - check for QPs still in use 271 * @qpt: the QP table to empty 272 * 273 * There should not be any QPs still in use. 274 * Free memory for table. 275 */ 276 unsigned qib_free_all_qps(struct qib_devdata *dd) 277 { 278 struct qib_ibdev *dev = &dd->verbs_dev; 279 unsigned long flags; 280 struct qib_qp *qp; 281 unsigned n, qp_inuse = 0; 282 283 for (n = 0; n < dd->num_pports; n++) { 284 struct qib_ibport *ibp = &dd->pport[n].ibport_data; 285 286 if (!qib_mcast_tree_empty(ibp)) 287 qp_inuse++; 288 if (ibp->qp0) 289 qp_inuse++; 290 if (ibp->qp1) 291 qp_inuse++; 292 } 293 294 spin_lock_irqsave(&dev->qpt_lock, flags); 295 for (n = 0; n < dev->qp_table_size; n++) { 296 qp = dev->qp_table[n]; 297 dev->qp_table[n] = NULL; 298 299 for (; qp; qp = qp->next) 300 qp_inuse++; 301 } 302 spin_unlock_irqrestore(&dev->qpt_lock, flags); 303 304 return qp_inuse; 305 } 306 307 /** 308 * qib_lookup_qpn - return the QP with the given QPN 309 * @qpt: the QP table 310 * @qpn: the QP number to look up 311 * 312 * The caller is responsible for decrementing the QP reference count 313 * when done. 314 */ 315 struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) 316 { 317 struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; 318 unsigned long flags; 319 struct qib_qp *qp; 320 321 spin_lock_irqsave(&dev->qpt_lock, flags); 322 323 if (qpn == 0) 324 qp = ibp->qp0; 325 else if (qpn == 1) 326 qp = ibp->qp1; 327 else 328 for (qp = dev->qp_table[qpn % dev->qp_table_size]; qp; 329 qp = qp->next) 330 if (qp->ibqp.qp_num == qpn) 331 break; 332 if (qp) 333 atomic_inc(&qp->refcount); 334 335 spin_unlock_irqrestore(&dev->qpt_lock, flags); 336 return qp; 337 } 338 339 /** 340 * qib_reset_qp - initialize the QP state to the reset state 341 * @qp: the QP to reset 342 * @type: the QP type 343 */ 344 static void qib_reset_qp(struct qib_qp *qp, enum ib_qp_type type) 345 { 346 qp->remote_qpn = 0; 347 qp->qkey = 0; 348 qp->qp_access_flags = 0; 349 atomic_set(&qp->s_dma_busy, 0); 350 qp->s_flags &= QIB_S_SIGNAL_REQ_WR; 351 qp->s_hdrwords = 0; 352 qp->s_wqe = NULL; 353 qp->s_draining = 0; 354 qp->s_next_psn = 0; 355 qp->s_last_psn = 0; 356 qp->s_sending_psn = 0; 357 qp->s_sending_hpsn = 0; 358 qp->s_psn = 0; 359 qp->r_psn = 0; 360 qp->r_msn = 0; 361 if (type == IB_QPT_RC) { 362 qp->s_state = IB_OPCODE_RC_SEND_LAST; 363 qp->r_state = IB_OPCODE_RC_SEND_LAST; 364 } else { 365 qp->s_state = IB_OPCODE_UC_SEND_LAST; 366 qp->r_state = IB_OPCODE_UC_SEND_LAST; 367 } 368 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; 369 qp->r_nak_state = 0; 370 qp->r_aflags = 0; 371 qp->r_flags = 0; 372 qp->s_head = 0; 373 qp->s_tail = 0; 374 qp->s_cur = 0; 375 qp->s_acked = 0; 376 qp->s_last = 0; 377 qp->s_ssn = 1; 378 qp->s_lsn = 0; 379 qp->s_mig_state = IB_MIG_MIGRATED; 380 memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); 381 qp->r_head_ack_queue = 0; 382 qp->s_tail_ack_queue = 0; 383 qp->s_num_rd_atomic = 0; 384 if (qp->r_rq.wq) { 385 qp->r_rq.wq->head = 0; 386 qp->r_rq.wq->tail = 0; 387 } 388 qp->r_sge.num_sge = 0; 389 } 390 391 static void clear_mr_refs(struct qib_qp *qp, int clr_sends) 392 { 393 unsigned n; 394 395 if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) 396 while (qp->s_rdma_read_sge.num_sge) { 397 atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount); 398 if (--qp->s_rdma_read_sge.num_sge) 399 qp->s_rdma_read_sge.sge = 400 *qp->s_rdma_read_sge.sg_list++; 401 } 402 403 while (qp->r_sge.num_sge) { 404 atomic_dec(&qp->r_sge.sge.mr->refcount); 405 if (--qp->r_sge.num_sge) 406 qp->r_sge.sge = *qp->r_sge.sg_list++; 407 } 408 409 if (clr_sends) { 410 while (qp->s_last != qp->s_head) { 411 struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_last); 412 unsigned i; 413 414 for (i = 0; i < wqe->wr.num_sge; i++) { 415 struct qib_sge *sge = &wqe->sg_list[i]; 416 417 atomic_dec(&sge->mr->refcount); 418 } 419 if (qp->ibqp.qp_type == IB_QPT_UD || 420 qp->ibqp.qp_type == IB_QPT_SMI || 421 qp->ibqp.qp_type == IB_QPT_GSI) 422 atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); 423 if (++qp->s_last >= qp->s_size) 424 qp->s_last = 0; 425 } 426 if (qp->s_rdma_mr) { 427 atomic_dec(&qp->s_rdma_mr->refcount); 428 qp->s_rdma_mr = NULL; 429 } 430 } 431 432 if (qp->ibqp.qp_type != IB_QPT_RC) 433 return; 434 435 for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { 436 struct qib_ack_entry *e = &qp->s_ack_queue[n]; 437 438 if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && 439 e->rdma_sge.mr) { 440 atomic_dec(&e->rdma_sge.mr->refcount); 441 e->rdma_sge.mr = NULL; 442 } 443 } 444 } 445 446 /** 447 * qib_error_qp - put a QP into the error state 448 * @qp: the QP to put into the error state 449 * @err: the receive completion error to signal if a RWQE is active 450 * 451 * Flushes both send and receive work queues. 452 * Returns true if last WQE event should be generated. 453 * The QP r_lock and s_lock should be held and interrupts disabled. 454 * If we are already in error state, just return. 455 */ 456 int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err) 457 { 458 struct qib_ibdev *dev = to_idev(qp->ibqp.device); 459 struct ib_wc wc; 460 int ret = 0; 461 462 if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET) 463 goto bail; 464 465 qp->state = IB_QPS_ERR; 466 467 if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) { 468 qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR); 469 del_timer(&qp->s_timer); 470 } 471 spin_lock(&dev->pending_lock); 472 if (!list_empty(&qp->iowait) && !(qp->s_flags & QIB_S_BUSY)) { 473 qp->s_flags &= ~QIB_S_ANY_WAIT_IO; 474 list_del_init(&qp->iowait); 475 } 476 spin_unlock(&dev->pending_lock); 477 478 if (!(qp->s_flags & QIB_S_BUSY)) { 479 qp->s_hdrwords = 0; 480 if (qp->s_rdma_mr) { 481 atomic_dec(&qp->s_rdma_mr->refcount); 482 qp->s_rdma_mr = NULL; 483 } 484 if (qp->s_tx) { 485 qib_put_txreq(qp->s_tx); 486 qp->s_tx = NULL; 487 } 488 } 489 490 /* Schedule the sending tasklet to drain the send work queue. */ 491 if (qp->s_last != qp->s_head) 492 qib_schedule_send(qp); 493 494 clear_mr_refs(qp, 0); 495 496 memset(&wc, 0, sizeof(wc)); 497 wc.qp = &qp->ibqp; 498 wc.opcode = IB_WC_RECV; 499 500 if (test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) { 501 wc.wr_id = qp->r_wr_id; 502 wc.status = err; 503 qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); 504 } 505 wc.status = IB_WC_WR_FLUSH_ERR; 506 507 if (qp->r_rq.wq) { 508 struct qib_rwq *wq; 509 u32 head; 510 u32 tail; 511 512 spin_lock(&qp->r_rq.lock); 513 514 /* sanity check pointers before trusting them */ 515 wq = qp->r_rq.wq; 516 head = wq->head; 517 if (head >= qp->r_rq.size) 518 head = 0; 519 tail = wq->tail; 520 if (tail >= qp->r_rq.size) 521 tail = 0; 522 while (tail != head) { 523 wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; 524 if (++tail >= qp->r_rq.size) 525 tail = 0; 526 qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); 527 } 528 wq->tail = tail; 529 530 spin_unlock(&qp->r_rq.lock); 531 } else if (qp->ibqp.event_handler) 532 ret = 1; 533 534 bail: 535 return ret; 536 } 537 538 /** 539 * qib_modify_qp - modify the attributes of a queue pair 540 * @ibqp: the queue pair who's attributes we're modifying 541 * @attr: the new attributes 542 * @attr_mask: the mask of attributes to modify 543 * @udata: user data for libibverbs.so 544 * 545 * Returns 0 on success, otherwise returns an errno. 546 */ 547 int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 548 int attr_mask, struct ib_udata *udata) 549 { 550 struct qib_ibdev *dev = to_idev(ibqp->device); 551 struct qib_qp *qp = to_iqp(ibqp); 552 enum ib_qp_state cur_state, new_state; 553 struct ib_event ev; 554 int lastwqe = 0; 555 int mig = 0; 556 int ret; 557 u32 pmtu = 0; /* for gcc warning only */ 558 559 spin_lock_irq(&qp->r_lock); 560 spin_lock(&qp->s_lock); 561 562 cur_state = attr_mask & IB_QP_CUR_STATE ? 563 attr->cur_qp_state : qp->state; 564 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 565 566 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, 567 attr_mask)) 568 goto inval; 569 570 if (attr_mask & IB_QP_AV) { 571 if (attr->ah_attr.dlid >= QIB_MULTICAST_LID_BASE) 572 goto inval; 573 if (qib_check_ah(qp->ibqp.device, &attr->ah_attr)) 574 goto inval; 575 } 576 577 if (attr_mask & IB_QP_ALT_PATH) { 578 if (attr->alt_ah_attr.dlid >= QIB_MULTICAST_LID_BASE) 579 goto inval; 580 if (qib_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) 581 goto inval; 582 if (attr->alt_pkey_index >= qib_get_npkeys(dd_from_dev(dev))) 583 goto inval; 584 } 585 586 if (attr_mask & IB_QP_PKEY_INDEX) 587 if (attr->pkey_index >= qib_get_npkeys(dd_from_dev(dev))) 588 goto inval; 589 590 if (attr_mask & IB_QP_MIN_RNR_TIMER) 591 if (attr->min_rnr_timer > 31) 592 goto inval; 593 594 if (attr_mask & IB_QP_PORT) 595 if (qp->ibqp.qp_type == IB_QPT_SMI || 596 qp->ibqp.qp_type == IB_QPT_GSI || 597 attr->port_num == 0 || 598 attr->port_num > ibqp->device->phys_port_cnt) 599 goto inval; 600 601 if (attr_mask & IB_QP_DEST_QPN) 602 if (attr->dest_qp_num > QIB_QPN_MASK) 603 goto inval; 604 605 if (attr_mask & IB_QP_RETRY_CNT) 606 if (attr->retry_cnt > 7) 607 goto inval; 608 609 if (attr_mask & IB_QP_RNR_RETRY) 610 if (attr->rnr_retry > 7) 611 goto inval; 612 613 /* 614 * Don't allow invalid path_mtu values. OK to set greater 615 * than the active mtu (or even the max_cap, if we have tuned 616 * that to a small mtu. We'll set qp->path_mtu 617 * to the lesser of requested attribute mtu and active, 618 * for packetizing messages. 619 * Note that the QP port has to be set in INIT and MTU in RTR. 620 */ 621 if (attr_mask & IB_QP_PATH_MTU) { 622 struct qib_devdata *dd = dd_from_dev(dev); 623 int mtu, pidx = qp->port_num - 1; 624 625 mtu = ib_mtu_enum_to_int(attr->path_mtu); 626 if (mtu == -1) 627 goto inval; 628 if (mtu > dd->pport[pidx].ibmtu) { 629 switch (dd->pport[pidx].ibmtu) { 630 case 4096: 631 pmtu = IB_MTU_4096; 632 break; 633 case 2048: 634 pmtu = IB_MTU_2048; 635 break; 636 case 1024: 637 pmtu = IB_MTU_1024; 638 break; 639 case 512: 640 pmtu = IB_MTU_512; 641 break; 642 case 256: 643 pmtu = IB_MTU_256; 644 break; 645 default: 646 pmtu = IB_MTU_2048; 647 } 648 } else 649 pmtu = attr->path_mtu; 650 } 651 652 if (attr_mask & IB_QP_PATH_MIG_STATE) { 653 if (attr->path_mig_state == IB_MIG_REARM) { 654 if (qp->s_mig_state == IB_MIG_ARMED) 655 goto inval; 656 if (new_state != IB_QPS_RTS) 657 goto inval; 658 } else if (attr->path_mig_state == IB_MIG_MIGRATED) { 659 if (qp->s_mig_state == IB_MIG_REARM) 660 goto inval; 661 if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD) 662 goto inval; 663 if (qp->s_mig_state == IB_MIG_ARMED) 664 mig = 1; 665 } else 666 goto inval; 667 } 668 669 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 670 if (attr->max_dest_rd_atomic > QIB_MAX_RDMA_ATOMIC) 671 goto inval; 672 673 switch (new_state) { 674 case IB_QPS_RESET: 675 if (qp->state != IB_QPS_RESET) { 676 qp->state = IB_QPS_RESET; 677 spin_lock(&dev->pending_lock); 678 if (!list_empty(&qp->iowait)) 679 list_del_init(&qp->iowait); 680 spin_unlock(&dev->pending_lock); 681 qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT); 682 spin_unlock(&qp->s_lock); 683 spin_unlock_irq(&qp->r_lock); 684 /* Stop the sending work queue and retry timer */ 685 cancel_work_sync(&qp->s_work); 686 del_timer_sync(&qp->s_timer); 687 wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); 688 if (qp->s_tx) { 689 qib_put_txreq(qp->s_tx); 690 qp->s_tx = NULL; 691 } 692 remove_qp(dev, qp); 693 wait_event(qp->wait, !atomic_read(&qp->refcount)); 694 spin_lock_irq(&qp->r_lock); 695 spin_lock(&qp->s_lock); 696 clear_mr_refs(qp, 1); 697 qib_reset_qp(qp, ibqp->qp_type); 698 } 699 break; 700 701 case IB_QPS_RTR: 702 /* Allow event to retrigger if QP set to RTR more than once */ 703 qp->r_flags &= ~QIB_R_COMM_EST; 704 qp->state = new_state; 705 break; 706 707 case IB_QPS_SQD: 708 qp->s_draining = qp->s_last != qp->s_cur; 709 qp->state = new_state; 710 break; 711 712 case IB_QPS_SQE: 713 if (qp->ibqp.qp_type == IB_QPT_RC) 714 goto inval; 715 qp->state = new_state; 716 break; 717 718 case IB_QPS_ERR: 719 lastwqe = qib_error_qp(qp, IB_WC_WR_FLUSH_ERR); 720 break; 721 722 default: 723 qp->state = new_state; 724 break; 725 } 726 727 if (attr_mask & IB_QP_PKEY_INDEX) 728 qp->s_pkey_index = attr->pkey_index; 729 730 if (attr_mask & IB_QP_PORT) 731 qp->port_num = attr->port_num; 732 733 if (attr_mask & IB_QP_DEST_QPN) 734 qp->remote_qpn = attr->dest_qp_num; 735 736 if (attr_mask & IB_QP_SQ_PSN) { 737 qp->s_next_psn = attr->sq_psn & QIB_PSN_MASK; 738 qp->s_psn = qp->s_next_psn; 739 qp->s_sending_psn = qp->s_next_psn; 740 qp->s_last_psn = qp->s_next_psn - 1; 741 qp->s_sending_hpsn = qp->s_last_psn; 742 } 743 744 if (attr_mask & IB_QP_RQ_PSN) 745 qp->r_psn = attr->rq_psn & QIB_PSN_MASK; 746 747 if (attr_mask & IB_QP_ACCESS_FLAGS) 748 qp->qp_access_flags = attr->qp_access_flags; 749 750 if (attr_mask & IB_QP_AV) { 751 qp->remote_ah_attr = attr->ah_attr; 752 qp->s_srate = attr->ah_attr.static_rate; 753 } 754 755 if (attr_mask & IB_QP_ALT_PATH) { 756 qp->alt_ah_attr = attr->alt_ah_attr; 757 qp->s_alt_pkey_index = attr->alt_pkey_index; 758 } 759 760 if (attr_mask & IB_QP_PATH_MIG_STATE) { 761 qp->s_mig_state = attr->path_mig_state; 762 if (mig) { 763 qp->remote_ah_attr = qp->alt_ah_attr; 764 qp->port_num = qp->alt_ah_attr.port_num; 765 qp->s_pkey_index = qp->s_alt_pkey_index; 766 } 767 } 768 769 if (attr_mask & IB_QP_PATH_MTU) 770 qp->path_mtu = pmtu; 771 772 if (attr_mask & IB_QP_RETRY_CNT) { 773 qp->s_retry_cnt = attr->retry_cnt; 774 qp->s_retry = attr->retry_cnt; 775 } 776 777 if (attr_mask & IB_QP_RNR_RETRY) { 778 qp->s_rnr_retry_cnt = attr->rnr_retry; 779 qp->s_rnr_retry = attr->rnr_retry; 780 } 781 782 if (attr_mask & IB_QP_MIN_RNR_TIMER) 783 qp->r_min_rnr_timer = attr->min_rnr_timer; 784 785 if (attr_mask & IB_QP_TIMEOUT) 786 qp->timeout = attr->timeout; 787 788 if (attr_mask & IB_QP_QKEY) 789 qp->qkey = attr->qkey; 790 791 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 792 qp->r_max_rd_atomic = attr->max_dest_rd_atomic; 793 794 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) 795 qp->s_max_rd_atomic = attr->max_rd_atomic; 796 797 spin_unlock(&qp->s_lock); 798 spin_unlock_irq(&qp->r_lock); 799 800 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) 801 insert_qp(dev, qp); 802 803 if (lastwqe) { 804 ev.device = qp->ibqp.device; 805 ev.element.qp = &qp->ibqp; 806 ev.event = IB_EVENT_QP_LAST_WQE_REACHED; 807 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); 808 } 809 if (mig) { 810 ev.device = qp->ibqp.device; 811 ev.element.qp = &qp->ibqp; 812 ev.event = IB_EVENT_PATH_MIG; 813 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); 814 } 815 ret = 0; 816 goto bail; 817 818 inval: 819 spin_unlock(&qp->s_lock); 820 spin_unlock_irq(&qp->r_lock); 821 ret = -EINVAL; 822 823 bail: 824 return ret; 825 } 826 827 int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 828 int attr_mask, struct ib_qp_init_attr *init_attr) 829 { 830 struct qib_qp *qp = to_iqp(ibqp); 831 832 attr->qp_state = qp->state; 833 attr->cur_qp_state = attr->qp_state; 834 attr->path_mtu = qp->path_mtu; 835 attr->path_mig_state = qp->s_mig_state; 836 attr->qkey = qp->qkey; 837 attr->rq_psn = qp->r_psn & QIB_PSN_MASK; 838 attr->sq_psn = qp->s_next_psn & QIB_PSN_MASK; 839 attr->dest_qp_num = qp->remote_qpn; 840 attr->qp_access_flags = qp->qp_access_flags; 841 attr->cap.max_send_wr = qp->s_size - 1; 842 attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1; 843 attr->cap.max_send_sge = qp->s_max_sge; 844 attr->cap.max_recv_sge = qp->r_rq.max_sge; 845 attr->cap.max_inline_data = 0; 846 attr->ah_attr = qp->remote_ah_attr; 847 attr->alt_ah_attr = qp->alt_ah_attr; 848 attr->pkey_index = qp->s_pkey_index; 849 attr->alt_pkey_index = qp->s_alt_pkey_index; 850 attr->en_sqd_async_notify = 0; 851 attr->sq_draining = qp->s_draining; 852 attr->max_rd_atomic = qp->s_max_rd_atomic; 853 attr->max_dest_rd_atomic = qp->r_max_rd_atomic; 854 attr->min_rnr_timer = qp->r_min_rnr_timer; 855 attr->port_num = qp->port_num; 856 attr->timeout = qp->timeout; 857 attr->retry_cnt = qp->s_retry_cnt; 858 attr->rnr_retry = qp->s_rnr_retry_cnt; 859 attr->alt_port_num = qp->alt_ah_attr.port_num; 860 attr->alt_timeout = qp->alt_timeout; 861 862 init_attr->event_handler = qp->ibqp.event_handler; 863 init_attr->qp_context = qp->ibqp.qp_context; 864 init_attr->send_cq = qp->ibqp.send_cq; 865 init_attr->recv_cq = qp->ibqp.recv_cq; 866 init_attr->srq = qp->ibqp.srq; 867 init_attr->cap = attr->cap; 868 if (qp->s_flags & QIB_S_SIGNAL_REQ_WR) 869 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; 870 else 871 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; 872 init_attr->qp_type = qp->ibqp.qp_type; 873 init_attr->port_num = qp->port_num; 874 return 0; 875 } 876 877 /** 878 * qib_compute_aeth - compute the AETH (syndrome + MSN) 879 * @qp: the queue pair to compute the AETH for 880 * 881 * Returns the AETH. 882 */ 883 __be32 qib_compute_aeth(struct qib_qp *qp) 884 { 885 u32 aeth = qp->r_msn & QIB_MSN_MASK; 886 887 if (qp->ibqp.srq) { 888 /* 889 * Shared receive queues don't generate credits. 890 * Set the credit field to the invalid value. 891 */ 892 aeth |= QIB_AETH_CREDIT_INVAL << QIB_AETH_CREDIT_SHIFT; 893 } else { 894 u32 min, max, x; 895 u32 credits; 896 struct qib_rwq *wq = qp->r_rq.wq; 897 u32 head; 898 u32 tail; 899 900 /* sanity check pointers before trusting them */ 901 head = wq->head; 902 if (head >= qp->r_rq.size) 903 head = 0; 904 tail = wq->tail; 905 if (tail >= qp->r_rq.size) 906 tail = 0; 907 /* 908 * Compute the number of credits available (RWQEs). 909 * XXX Not holding the r_rq.lock here so there is a small 910 * chance that the pair of reads are not atomic. 911 */ 912 credits = head - tail; 913 if ((int)credits < 0) 914 credits += qp->r_rq.size; 915 /* 916 * Binary search the credit table to find the code to 917 * use. 918 */ 919 min = 0; 920 max = 31; 921 for (;;) { 922 x = (min + max) / 2; 923 if (credit_table[x] == credits) 924 break; 925 if (credit_table[x] > credits) 926 max = x; 927 else if (min == x) 928 break; 929 else 930 min = x; 931 } 932 aeth |= x << QIB_AETH_CREDIT_SHIFT; 933 } 934 return cpu_to_be32(aeth); 935 } 936 937 /** 938 * qib_create_qp - create a queue pair for a device 939 * @ibpd: the protection domain who's device we create the queue pair for 940 * @init_attr: the attributes of the queue pair 941 * @udata: user data for libibverbs.so 942 * 943 * Returns the queue pair on success, otherwise returns an errno. 944 * 945 * Called by the ib_create_qp() core verbs function. 946 */ 947 struct ib_qp *qib_create_qp(struct ib_pd *ibpd, 948 struct ib_qp_init_attr *init_attr, 949 struct ib_udata *udata) 950 { 951 struct qib_qp *qp; 952 int err; 953 struct qib_swqe *swq = NULL; 954 struct qib_ibdev *dev; 955 struct qib_devdata *dd; 956 size_t sz; 957 size_t sg_list_sz; 958 struct ib_qp *ret; 959 960 if (init_attr->cap.max_send_sge > ib_qib_max_sges || 961 init_attr->cap.max_send_wr > ib_qib_max_qp_wrs) { 962 ret = ERR_PTR(-EINVAL); 963 goto bail; 964 } 965 966 /* Check receive queue parameters if no SRQ is specified. */ 967 if (!init_attr->srq) { 968 if (init_attr->cap.max_recv_sge > ib_qib_max_sges || 969 init_attr->cap.max_recv_wr > ib_qib_max_qp_wrs) { 970 ret = ERR_PTR(-EINVAL); 971 goto bail; 972 } 973 if (init_attr->cap.max_send_sge + 974 init_attr->cap.max_send_wr + 975 init_attr->cap.max_recv_sge + 976 init_attr->cap.max_recv_wr == 0) { 977 ret = ERR_PTR(-EINVAL); 978 goto bail; 979 } 980 } 981 982 switch (init_attr->qp_type) { 983 case IB_QPT_SMI: 984 case IB_QPT_GSI: 985 if (init_attr->port_num == 0 || 986 init_attr->port_num > ibpd->device->phys_port_cnt) { 987 ret = ERR_PTR(-EINVAL); 988 goto bail; 989 } 990 case IB_QPT_UC: 991 case IB_QPT_RC: 992 case IB_QPT_UD: 993 sz = sizeof(struct qib_sge) * 994 init_attr->cap.max_send_sge + 995 sizeof(struct qib_swqe); 996 swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); 997 if (swq == NULL) { 998 ret = ERR_PTR(-ENOMEM); 999 goto bail; 1000 } 1001 sz = sizeof(*qp); 1002 sg_list_sz = 0; 1003 if (init_attr->srq) { 1004 struct qib_srq *srq = to_isrq(init_attr->srq); 1005 1006 if (srq->rq.max_sge > 1) 1007 sg_list_sz = sizeof(*qp->r_sg_list) * 1008 (srq->rq.max_sge - 1); 1009 } else if (init_attr->cap.max_recv_sge > 1) 1010 sg_list_sz = sizeof(*qp->r_sg_list) * 1011 (init_attr->cap.max_recv_sge - 1); 1012 qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); 1013 if (!qp) { 1014 ret = ERR_PTR(-ENOMEM); 1015 goto bail_swq; 1016 } 1017 if (init_attr->srq) 1018 sz = 0; 1019 else { 1020 qp->r_rq.size = init_attr->cap.max_recv_wr + 1; 1021 qp->r_rq.max_sge = init_attr->cap.max_recv_sge; 1022 sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + 1023 sizeof(struct qib_rwqe); 1024 qp->r_rq.wq = vmalloc_user(sizeof(struct qib_rwq) + 1025 qp->r_rq.size * sz); 1026 if (!qp->r_rq.wq) { 1027 ret = ERR_PTR(-ENOMEM); 1028 goto bail_qp; 1029 } 1030 } 1031 1032 /* 1033 * ib_create_qp() will initialize qp->ibqp 1034 * except for qp->ibqp.qp_num. 1035 */ 1036 spin_lock_init(&qp->r_lock); 1037 spin_lock_init(&qp->s_lock); 1038 spin_lock_init(&qp->r_rq.lock); 1039 atomic_set(&qp->refcount, 0); 1040 init_waitqueue_head(&qp->wait); 1041 init_waitqueue_head(&qp->wait_dma); 1042 init_timer(&qp->s_timer); 1043 qp->s_timer.data = (unsigned long)qp; 1044 INIT_WORK(&qp->s_work, qib_do_send); 1045 INIT_LIST_HEAD(&qp->iowait); 1046 INIT_LIST_HEAD(&qp->rspwait); 1047 qp->state = IB_QPS_RESET; 1048 qp->s_wq = swq; 1049 qp->s_size = init_attr->cap.max_send_wr + 1; 1050 qp->s_max_sge = init_attr->cap.max_send_sge; 1051 if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) 1052 qp->s_flags = QIB_S_SIGNAL_REQ_WR; 1053 dev = to_idev(ibpd->device); 1054 dd = dd_from_dev(dev); 1055 err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type, 1056 init_attr->port_num); 1057 if (err < 0) { 1058 ret = ERR_PTR(err); 1059 vfree(qp->r_rq.wq); 1060 goto bail_qp; 1061 } 1062 qp->ibqp.qp_num = err; 1063 qp->port_num = init_attr->port_num; 1064 qp->processor_id = smp_processor_id(); 1065 qib_reset_qp(qp, init_attr->qp_type); 1066 break; 1067 1068 default: 1069 /* Don't support raw QPs */ 1070 ret = ERR_PTR(-ENOSYS); 1071 goto bail; 1072 } 1073 1074 init_attr->cap.max_inline_data = 0; 1075 1076 /* 1077 * Return the address of the RWQ as the offset to mmap. 1078 * See qib_mmap() for details. 1079 */ 1080 if (udata && udata->outlen >= sizeof(__u64)) { 1081 if (!qp->r_rq.wq) { 1082 __u64 offset = 0; 1083 1084 err = ib_copy_to_udata(udata, &offset, 1085 sizeof(offset)); 1086 if (err) { 1087 ret = ERR_PTR(err); 1088 goto bail_ip; 1089 } 1090 } else { 1091 u32 s = sizeof(struct qib_rwq) + qp->r_rq.size * sz; 1092 1093 qp->ip = qib_create_mmap_info(dev, s, 1094 ibpd->uobject->context, 1095 qp->r_rq.wq); 1096 if (!qp->ip) { 1097 ret = ERR_PTR(-ENOMEM); 1098 goto bail_ip; 1099 } 1100 1101 err = ib_copy_to_udata(udata, &(qp->ip->offset), 1102 sizeof(qp->ip->offset)); 1103 if (err) { 1104 ret = ERR_PTR(err); 1105 goto bail_ip; 1106 } 1107 } 1108 } 1109 1110 spin_lock(&dev->n_qps_lock); 1111 if (dev->n_qps_allocated == ib_qib_max_qps) { 1112 spin_unlock(&dev->n_qps_lock); 1113 ret = ERR_PTR(-ENOMEM); 1114 goto bail_ip; 1115 } 1116 1117 dev->n_qps_allocated++; 1118 spin_unlock(&dev->n_qps_lock); 1119 1120 if (qp->ip) { 1121 spin_lock_irq(&dev->pending_lock); 1122 list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps); 1123 spin_unlock_irq(&dev->pending_lock); 1124 } 1125 1126 ret = &qp->ibqp; 1127 goto bail; 1128 1129 bail_ip: 1130 if (qp->ip) 1131 kref_put(&qp->ip->ref, qib_release_mmap_info); 1132 else 1133 vfree(qp->r_rq.wq); 1134 free_qpn(&dev->qpn_table, qp->ibqp.qp_num); 1135 bail_qp: 1136 kfree(qp); 1137 bail_swq: 1138 vfree(swq); 1139 bail: 1140 return ret; 1141 } 1142 1143 /** 1144 * qib_destroy_qp - destroy a queue pair 1145 * @ibqp: the queue pair to destroy 1146 * 1147 * Returns 0 on success. 1148 * 1149 * Note that this can be called while the QP is actively sending or 1150 * receiving! 1151 */ 1152 int qib_destroy_qp(struct ib_qp *ibqp) 1153 { 1154 struct qib_qp *qp = to_iqp(ibqp); 1155 struct qib_ibdev *dev = to_idev(ibqp->device); 1156 1157 /* Make sure HW and driver activity is stopped. */ 1158 spin_lock_irq(&qp->s_lock); 1159 if (qp->state != IB_QPS_RESET) { 1160 qp->state = IB_QPS_RESET; 1161 spin_lock(&dev->pending_lock); 1162 if (!list_empty(&qp->iowait)) 1163 list_del_init(&qp->iowait); 1164 spin_unlock(&dev->pending_lock); 1165 qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT); 1166 spin_unlock_irq(&qp->s_lock); 1167 cancel_work_sync(&qp->s_work); 1168 del_timer_sync(&qp->s_timer); 1169 wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); 1170 if (qp->s_tx) { 1171 qib_put_txreq(qp->s_tx); 1172 qp->s_tx = NULL; 1173 } 1174 remove_qp(dev, qp); 1175 wait_event(qp->wait, !atomic_read(&qp->refcount)); 1176 clear_mr_refs(qp, 1); 1177 } else 1178 spin_unlock_irq(&qp->s_lock); 1179 1180 /* all user's cleaned up, mark it available */ 1181 free_qpn(&dev->qpn_table, qp->ibqp.qp_num); 1182 spin_lock(&dev->n_qps_lock); 1183 dev->n_qps_allocated--; 1184 spin_unlock(&dev->n_qps_lock); 1185 1186 if (qp->ip) 1187 kref_put(&qp->ip->ref, qib_release_mmap_info); 1188 else 1189 vfree(qp->r_rq.wq); 1190 vfree(qp->s_wq); 1191 kfree(qp); 1192 return 0; 1193 } 1194 1195 /** 1196 * qib_init_qpn_table - initialize the QP number table for a device 1197 * @qpt: the QPN table 1198 */ 1199 void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt) 1200 { 1201 spin_lock_init(&qpt->lock); 1202 qpt->last = 1; /* start with QPN 2 */ 1203 qpt->nmaps = 1; 1204 qpt->mask = dd->qpn_mask; 1205 } 1206 1207 /** 1208 * qib_free_qpn_table - free the QP number table for a device 1209 * @qpt: the QPN table 1210 */ 1211 void qib_free_qpn_table(struct qib_qpn_table *qpt) 1212 { 1213 int i; 1214 1215 for (i = 0; i < ARRAY_SIZE(qpt->map); i++) 1216 if (qpt->map[i].page) 1217 free_page((unsigned long) qpt->map[i].page); 1218 } 1219 1220 /** 1221 * qib_get_credit - flush the send work queue of a QP 1222 * @qp: the qp who's send work queue to flush 1223 * @aeth: the Acknowledge Extended Transport Header 1224 * 1225 * The QP s_lock should be held. 1226 */ 1227 void qib_get_credit(struct qib_qp *qp, u32 aeth) 1228 { 1229 u32 credit = (aeth >> QIB_AETH_CREDIT_SHIFT) & QIB_AETH_CREDIT_MASK; 1230 1231 /* 1232 * If the credit is invalid, we can send 1233 * as many packets as we like. Otherwise, we have to 1234 * honor the credit field. 1235 */ 1236 if (credit == QIB_AETH_CREDIT_INVAL) { 1237 if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) { 1238 qp->s_flags |= QIB_S_UNLIMITED_CREDIT; 1239 if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) { 1240 qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT; 1241 qib_schedule_send(qp); 1242 } 1243 } 1244 } else if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) { 1245 /* Compute new LSN (i.e., MSN + credit) */ 1246 credit = (aeth + credit_table[credit]) & QIB_MSN_MASK; 1247 if (qib_cmp24(credit, qp->s_lsn) > 0) { 1248 qp->s_lsn = credit; 1249 if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) { 1250 qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT; 1251 qib_schedule_send(qp); 1252 } 1253 } 1254 } 1255 } 1256