1 /* 2 * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. 3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35 #include <rdma/ib_mad.h> 36 #include <rdma/ib_user_verbs.h> 37 #include <linux/io.h> 38 #include <linux/module.h> 39 #include <linux/utsname.h> 40 #include <linux/rculist.h> 41 #include <linux/mm.h> 42 #include <linux/random.h> 43 #include <linux/vmalloc.h> 44 #include <rdma/rdma_vt.h> 45 46 #include "qib.h" 47 #include "qib_common.h" 48 49 static unsigned int ib_qib_qp_table_size = 256; 50 module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO); 51 MODULE_PARM_DESC(qp_table_size, "QP table size"); 52 53 static unsigned int qib_lkey_table_size = 16; 54 module_param_named(lkey_table_size, qib_lkey_table_size, uint, 55 S_IRUGO); 56 MODULE_PARM_DESC(lkey_table_size, 57 "LKEY table size in bits (2^n, 1 <= n <= 23)"); 58 59 static unsigned int ib_qib_max_pds = 0xFFFF; 60 module_param_named(max_pds, ib_qib_max_pds, uint, S_IRUGO); 61 MODULE_PARM_DESC(max_pds, 62 "Maximum number of protection domains to support"); 63 64 static unsigned int ib_qib_max_ahs = 0xFFFF; 65 module_param_named(max_ahs, ib_qib_max_ahs, uint, S_IRUGO); 66 MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support"); 67 68 unsigned int ib_qib_max_cqes = 0x2FFFF; 69 module_param_named(max_cqes, ib_qib_max_cqes, uint, S_IRUGO); 70 MODULE_PARM_DESC(max_cqes, 71 "Maximum number of completion queue entries to support"); 72 73 unsigned int ib_qib_max_cqs = 0x1FFFF; 74 module_param_named(max_cqs, ib_qib_max_cqs, uint, S_IRUGO); 75 MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support"); 76 77 unsigned int ib_qib_max_qp_wrs = 0x3FFF; 78 module_param_named(max_qp_wrs, ib_qib_max_qp_wrs, uint, S_IRUGO); 79 MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); 80 81 unsigned int ib_qib_max_qps = 16384; 82 module_param_named(max_qps, ib_qib_max_qps, uint, S_IRUGO); 83 MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); 84 85 unsigned int ib_qib_max_sges = 0x60; 86 module_param_named(max_sges, ib_qib_max_sges, uint, S_IRUGO); 87 MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support"); 88 89 unsigned int ib_qib_max_mcast_grps = 16384; 90 module_param_named(max_mcast_grps, ib_qib_max_mcast_grps, uint, S_IRUGO); 91 MODULE_PARM_DESC(max_mcast_grps, 92 "Maximum number of multicast groups to support"); 93 94 unsigned int ib_qib_max_mcast_qp_attached = 16; 95 module_param_named(max_mcast_qp_attached, ib_qib_max_mcast_qp_attached, 96 uint, S_IRUGO); 97 MODULE_PARM_DESC(max_mcast_qp_attached, 98 "Maximum number of attached QPs to support"); 99 100 unsigned int ib_qib_max_srqs = 1024; 101 module_param_named(max_srqs, ib_qib_max_srqs, uint, S_IRUGO); 102 MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support"); 103 104 unsigned int ib_qib_max_srq_sges = 128; 105 module_param_named(max_srq_sges, ib_qib_max_srq_sges, uint, S_IRUGO); 106 MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support"); 107 108 unsigned int ib_qib_max_srq_wrs = 0x1FFFF; 109 module_param_named(max_srq_wrs, ib_qib_max_srq_wrs, uint, S_IRUGO); 110 MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); 111 112 static unsigned int ib_qib_disable_sma; 113 module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO); 114 MODULE_PARM_DESC(disable_sma, "Disable the SMA"); 115 116 /* 117 * System image GUID. 118 */ 119 __be64 ib_qib_sys_image_guid; 120 121 /** 122 * qib_copy_sge - copy data to SGE memory 123 * @ss: the SGE state 124 * @data: the data to copy 125 * @length: the length of the data 126 */ 127 void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, int release) 128 { 129 struct rvt_sge *sge = &ss->sge; 130 131 while (length) { 132 u32 len = rvt_get_sge_length(sge, length); 133 134 WARN_ON_ONCE(len == 0); 135 memcpy(sge->vaddr, data, len); 136 rvt_update_sge(ss, len, release); 137 data += len; 138 length -= len; 139 } 140 } 141 142 /* 143 * Count the number of DMA descriptors needed to send length bytes of data. 144 * Don't modify the qib_sge_state to get the count. 145 * Return zero if any of the segments is not aligned. 146 */ 147 static u32 qib_count_sge(struct rvt_sge_state *ss, u32 length) 148 { 149 struct rvt_sge *sg_list = ss->sg_list; 150 struct rvt_sge sge = ss->sge; 151 u8 num_sge = ss->num_sge; 152 u32 ndesc = 1; /* count the header */ 153 154 while (length) { 155 u32 len = sge.length; 156 157 if (len > length) 158 len = length; 159 if (len > sge.sge_length) 160 len = sge.sge_length; 161 BUG_ON(len == 0); 162 if (((long) sge.vaddr & (sizeof(u32) - 1)) || 163 (len != length && (len & (sizeof(u32) - 1)))) { 164 ndesc = 0; 165 break; 166 } 167 ndesc++; 168 sge.vaddr += len; 169 sge.length -= len; 170 sge.sge_length -= len; 171 if (sge.sge_length == 0) { 172 if (--num_sge) 173 sge = *sg_list++; 174 } else if (sge.length == 0 && sge.mr->lkey) { 175 if (++sge.n >= RVT_SEGSZ) { 176 if (++sge.m >= sge.mr->mapsz) 177 break; 178 sge.n = 0; 179 } 180 sge.vaddr = 181 sge.mr->map[sge.m]->segs[sge.n].vaddr; 182 sge.length = 183 sge.mr->map[sge.m]->segs[sge.n].length; 184 } 185 length -= len; 186 } 187 return ndesc; 188 } 189 190 /* 191 * Copy from the SGEs to the data buffer. 192 */ 193 static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length) 194 { 195 struct rvt_sge *sge = &ss->sge; 196 197 while (length) { 198 u32 len = sge->length; 199 200 if (len > length) 201 len = length; 202 if (len > sge->sge_length) 203 len = sge->sge_length; 204 BUG_ON(len == 0); 205 memcpy(data, sge->vaddr, len); 206 sge->vaddr += len; 207 sge->length -= len; 208 sge->sge_length -= len; 209 if (sge->sge_length == 0) { 210 if (--ss->num_sge) 211 *sge = *ss->sg_list++; 212 } else if (sge->length == 0 && sge->mr->lkey) { 213 if (++sge->n >= RVT_SEGSZ) { 214 if (++sge->m >= sge->mr->mapsz) 215 break; 216 sge->n = 0; 217 } 218 sge->vaddr = 219 sge->mr->map[sge->m]->segs[sge->n].vaddr; 220 sge->length = 221 sge->mr->map[sge->m]->segs[sge->n].length; 222 } 223 data += len; 224 length -= len; 225 } 226 } 227 228 /** 229 * qib_qp_rcv - processing an incoming packet on a QP 230 * @rcd: the context pointer 231 * @hdr: the packet header 232 * @has_grh: true if the packet has a GRH 233 * @data: the packet data 234 * @tlen: the packet length 235 * @qp: the QP the packet came on 236 * 237 * This is called from qib_ib_rcv() to process an incoming packet 238 * for the given QP. 239 * Called at interrupt level. 240 */ 241 static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, 242 int has_grh, void *data, u32 tlen, struct rvt_qp *qp) 243 { 244 struct qib_ibport *ibp = &rcd->ppd->ibport_data; 245 246 spin_lock(&qp->r_lock); 247 248 /* Check for valid receive state. */ 249 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) { 250 ibp->rvp.n_pkt_drops++; 251 goto unlock; 252 } 253 254 switch (qp->ibqp.qp_type) { 255 case IB_QPT_SMI: 256 case IB_QPT_GSI: 257 if (ib_qib_disable_sma) 258 break; 259 /* FALLTHROUGH */ 260 case IB_QPT_UD: 261 qib_ud_rcv(ibp, hdr, has_grh, data, tlen, qp); 262 break; 263 264 case IB_QPT_RC: 265 qib_rc_rcv(rcd, hdr, has_grh, data, tlen, qp); 266 break; 267 268 case IB_QPT_UC: 269 qib_uc_rcv(ibp, hdr, has_grh, data, tlen, qp); 270 break; 271 272 default: 273 break; 274 } 275 276 unlock: 277 spin_unlock(&qp->r_lock); 278 } 279 280 /** 281 * qib_ib_rcv - process an incoming packet 282 * @rcd: the context pointer 283 * @rhdr: the header of the packet 284 * @data: the packet payload 285 * @tlen: the packet length 286 * 287 * This is called from qib_kreceive() to process an incoming packet at 288 * interrupt level. Tlen is the length of the header + data + CRC in bytes. 289 */ 290 void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) 291 { 292 struct qib_pportdata *ppd = rcd->ppd; 293 struct qib_ibport *ibp = &ppd->ibport_data; 294 struct ib_header *hdr = rhdr; 295 struct qib_devdata *dd = ppd->dd; 296 struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; 297 struct ib_other_headers *ohdr; 298 struct rvt_qp *qp; 299 u32 qp_num; 300 int lnh; 301 u8 opcode; 302 u16 lid; 303 304 /* 24 == LRH+BTH+CRC */ 305 if (unlikely(tlen < 24)) 306 goto drop; 307 308 /* Check for a valid destination LID (see ch. 7.11.1). */ 309 lid = be16_to_cpu(hdr->lrh[1]); 310 if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) { 311 lid &= ~((1 << ppd->lmc) - 1); 312 if (unlikely(lid != ppd->lid)) 313 goto drop; 314 } 315 316 /* Check for GRH */ 317 lnh = be16_to_cpu(hdr->lrh[0]) & 3; 318 if (lnh == QIB_LRH_BTH) 319 ohdr = &hdr->u.oth; 320 else if (lnh == QIB_LRH_GRH) { 321 u32 vtf; 322 323 ohdr = &hdr->u.l.oth; 324 if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) 325 goto drop; 326 vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow); 327 if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) 328 goto drop; 329 } else 330 goto drop; 331 332 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f; 333 #ifdef CONFIG_DEBUG_FS 334 rcd->opstats->stats[opcode].n_bytes += tlen; 335 rcd->opstats->stats[opcode].n_packets++; 336 #endif 337 338 /* Get the destination QP number. */ 339 qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; 340 if (qp_num == QIB_MULTICAST_QPN) { 341 struct rvt_mcast *mcast; 342 struct rvt_mcast_qp *p; 343 344 if (lnh != QIB_LRH_GRH) 345 goto drop; 346 mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid); 347 if (mcast == NULL) 348 goto drop; 349 this_cpu_inc(ibp->pmastats->n_multicast_rcv); 350 list_for_each_entry_rcu(p, &mcast->qp_list, list) 351 qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp); 352 /* 353 * Notify rvt_multicast_detach() if it is waiting for us 354 * to finish. 355 */ 356 if (atomic_dec_return(&mcast->refcount) <= 1) 357 wake_up(&mcast->wait); 358 } else { 359 rcu_read_lock(); 360 qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); 361 if (!qp) { 362 rcu_read_unlock(); 363 goto drop; 364 } 365 this_cpu_inc(ibp->pmastats->n_unicast_rcv); 366 qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp); 367 rcu_read_unlock(); 368 } 369 return; 370 371 drop: 372 ibp->rvp.n_pkt_drops++; 373 } 374 375 /* 376 * This is called from a timer to check for QPs 377 * which need kernel memory in order to send a packet. 378 */ 379 static void mem_timer(unsigned long data) 380 { 381 struct qib_ibdev *dev = (struct qib_ibdev *) data; 382 struct list_head *list = &dev->memwait; 383 struct rvt_qp *qp = NULL; 384 struct qib_qp_priv *priv = NULL; 385 unsigned long flags; 386 387 spin_lock_irqsave(&dev->rdi.pending_lock, flags); 388 if (!list_empty(list)) { 389 priv = list_entry(list->next, struct qib_qp_priv, iowait); 390 qp = priv->owner; 391 list_del_init(&priv->iowait); 392 rvt_get_qp(qp); 393 if (!list_empty(list)) 394 mod_timer(&dev->mem_timer, jiffies + 1); 395 } 396 spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); 397 398 if (qp) { 399 spin_lock_irqsave(&qp->s_lock, flags); 400 if (qp->s_flags & RVT_S_WAIT_KMEM) { 401 qp->s_flags &= ~RVT_S_WAIT_KMEM; 402 qib_schedule_send(qp); 403 } 404 spin_unlock_irqrestore(&qp->s_lock, flags); 405 rvt_put_qp(qp); 406 } 407 } 408 409 #ifdef __LITTLE_ENDIAN 410 static inline u32 get_upper_bits(u32 data, u32 shift) 411 { 412 return data >> shift; 413 } 414 415 static inline u32 set_upper_bits(u32 data, u32 shift) 416 { 417 return data << shift; 418 } 419 420 static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) 421 { 422 data <<= ((sizeof(u32) - n) * BITS_PER_BYTE); 423 data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE); 424 return data; 425 } 426 #else 427 static inline u32 get_upper_bits(u32 data, u32 shift) 428 { 429 return data << shift; 430 } 431 432 static inline u32 set_upper_bits(u32 data, u32 shift) 433 { 434 return data >> shift; 435 } 436 437 static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) 438 { 439 data >>= ((sizeof(u32) - n) * BITS_PER_BYTE); 440 data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE); 441 return data; 442 } 443 #endif 444 445 static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss, 446 u32 length, unsigned flush_wc) 447 { 448 u32 extra = 0; 449 u32 data = 0; 450 u32 last; 451 452 while (1) { 453 u32 len = ss->sge.length; 454 u32 off; 455 456 if (len > length) 457 len = length; 458 if (len > ss->sge.sge_length) 459 len = ss->sge.sge_length; 460 BUG_ON(len == 0); 461 /* If the source address is not aligned, try to align it. */ 462 off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); 463 if (off) { 464 u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr & 465 ~(sizeof(u32) - 1)); 466 u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE); 467 u32 y; 468 469 y = sizeof(u32) - off; 470 if (len > y) 471 len = y; 472 if (len + extra >= sizeof(u32)) { 473 data |= set_upper_bits(v, extra * 474 BITS_PER_BYTE); 475 len = sizeof(u32) - extra; 476 if (len == length) { 477 last = data; 478 break; 479 } 480 __raw_writel(data, piobuf); 481 piobuf++; 482 extra = 0; 483 data = 0; 484 } else { 485 /* Clear unused upper bytes */ 486 data |= clear_upper_bytes(v, len, extra); 487 if (len == length) { 488 last = data; 489 break; 490 } 491 extra += len; 492 } 493 } else if (extra) { 494 /* Source address is aligned. */ 495 u32 *addr = (u32 *) ss->sge.vaddr; 496 int shift = extra * BITS_PER_BYTE; 497 int ushift = 32 - shift; 498 u32 l = len; 499 500 while (l >= sizeof(u32)) { 501 u32 v = *addr; 502 503 data |= set_upper_bits(v, shift); 504 __raw_writel(data, piobuf); 505 data = get_upper_bits(v, ushift); 506 piobuf++; 507 addr++; 508 l -= sizeof(u32); 509 } 510 /* 511 * We still have 'extra' number of bytes leftover. 512 */ 513 if (l) { 514 u32 v = *addr; 515 516 if (l + extra >= sizeof(u32)) { 517 data |= set_upper_bits(v, shift); 518 len -= l + extra - sizeof(u32); 519 if (len == length) { 520 last = data; 521 break; 522 } 523 __raw_writel(data, piobuf); 524 piobuf++; 525 extra = 0; 526 data = 0; 527 } else { 528 /* Clear unused upper bytes */ 529 data |= clear_upper_bytes(v, l, extra); 530 if (len == length) { 531 last = data; 532 break; 533 } 534 extra += l; 535 } 536 } else if (len == length) { 537 last = data; 538 break; 539 } 540 } else if (len == length) { 541 u32 w; 542 543 /* 544 * Need to round up for the last dword in the 545 * packet. 546 */ 547 w = (len + 3) >> 2; 548 qib_pio_copy(piobuf, ss->sge.vaddr, w - 1); 549 piobuf += w - 1; 550 last = ((u32 *) ss->sge.vaddr)[w - 1]; 551 break; 552 } else { 553 u32 w = len >> 2; 554 555 qib_pio_copy(piobuf, ss->sge.vaddr, w); 556 piobuf += w; 557 558 extra = len & (sizeof(u32) - 1); 559 if (extra) { 560 u32 v = ((u32 *) ss->sge.vaddr)[w]; 561 562 /* Clear unused upper bytes */ 563 data = clear_upper_bytes(v, extra, 0); 564 } 565 } 566 rvt_update_sge(ss, len, false); 567 length -= len; 568 } 569 /* Update address before sending packet. */ 570 rvt_update_sge(ss, length, false); 571 if (flush_wc) { 572 /* must flush early everything before trigger word */ 573 qib_flush_wc(); 574 __raw_writel(last, piobuf); 575 /* be sure trigger word is written */ 576 qib_flush_wc(); 577 } else 578 __raw_writel(last, piobuf); 579 } 580 581 static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev, 582 struct rvt_qp *qp) 583 { 584 struct qib_qp_priv *priv = qp->priv; 585 struct qib_verbs_txreq *tx; 586 unsigned long flags; 587 588 spin_lock_irqsave(&qp->s_lock, flags); 589 spin_lock(&dev->rdi.pending_lock); 590 591 if (!list_empty(&dev->txreq_free)) { 592 struct list_head *l = dev->txreq_free.next; 593 594 list_del(l); 595 spin_unlock(&dev->rdi.pending_lock); 596 spin_unlock_irqrestore(&qp->s_lock, flags); 597 tx = list_entry(l, struct qib_verbs_txreq, txreq.list); 598 } else { 599 if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK && 600 list_empty(&priv->iowait)) { 601 dev->n_txwait++; 602 qp->s_flags |= RVT_S_WAIT_TX; 603 list_add_tail(&priv->iowait, &dev->txwait); 604 } 605 qp->s_flags &= ~RVT_S_BUSY; 606 spin_unlock(&dev->rdi.pending_lock); 607 spin_unlock_irqrestore(&qp->s_lock, flags); 608 tx = ERR_PTR(-EBUSY); 609 } 610 return tx; 611 } 612 613 static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev, 614 struct rvt_qp *qp) 615 { 616 struct qib_verbs_txreq *tx; 617 unsigned long flags; 618 619 spin_lock_irqsave(&dev->rdi.pending_lock, flags); 620 /* assume the list non empty */ 621 if (likely(!list_empty(&dev->txreq_free))) { 622 struct list_head *l = dev->txreq_free.next; 623 624 list_del(l); 625 spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); 626 tx = list_entry(l, struct qib_verbs_txreq, txreq.list); 627 } else { 628 /* call slow path to get the extra lock */ 629 spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); 630 tx = __get_txreq(dev, qp); 631 } 632 return tx; 633 } 634 635 void qib_put_txreq(struct qib_verbs_txreq *tx) 636 { 637 struct qib_ibdev *dev; 638 struct rvt_qp *qp; 639 struct qib_qp_priv *priv; 640 unsigned long flags; 641 642 qp = tx->qp; 643 dev = to_idev(qp->ibqp.device); 644 645 if (tx->mr) { 646 rvt_put_mr(tx->mr); 647 tx->mr = NULL; 648 } 649 if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) { 650 tx->txreq.flags &= ~QIB_SDMA_TXREQ_F_FREEBUF; 651 dma_unmap_single(&dd_from_dev(dev)->pcidev->dev, 652 tx->txreq.addr, tx->hdr_dwords << 2, 653 DMA_TO_DEVICE); 654 kfree(tx->align_buf); 655 } 656 657 spin_lock_irqsave(&dev->rdi.pending_lock, flags); 658 659 /* Put struct back on free list */ 660 list_add(&tx->txreq.list, &dev->txreq_free); 661 662 if (!list_empty(&dev->txwait)) { 663 /* Wake up first QP wanting a free struct */ 664 priv = list_entry(dev->txwait.next, struct qib_qp_priv, 665 iowait); 666 qp = priv->owner; 667 list_del_init(&priv->iowait); 668 rvt_get_qp(qp); 669 spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); 670 671 spin_lock_irqsave(&qp->s_lock, flags); 672 if (qp->s_flags & RVT_S_WAIT_TX) { 673 qp->s_flags &= ~RVT_S_WAIT_TX; 674 qib_schedule_send(qp); 675 } 676 spin_unlock_irqrestore(&qp->s_lock, flags); 677 678 rvt_put_qp(qp); 679 } else 680 spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); 681 } 682 683 /* 684 * This is called when there are send DMA descriptors that might be 685 * available. 686 * 687 * This is called with ppd->sdma_lock held. 688 */ 689 void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail) 690 { 691 struct rvt_qp *qp, *nqp; 692 struct qib_qp_priv *qpp, *nqpp; 693 struct rvt_qp *qps[20]; 694 struct qib_ibdev *dev; 695 unsigned i, n; 696 697 n = 0; 698 dev = &ppd->dd->verbs_dev; 699 spin_lock(&dev->rdi.pending_lock); 700 701 /* Search wait list for first QP wanting DMA descriptors. */ 702 list_for_each_entry_safe(qpp, nqpp, &dev->dmawait, iowait) { 703 qp = qpp->owner; 704 nqp = nqpp->owner; 705 if (qp->port_num != ppd->port) 706 continue; 707 if (n == ARRAY_SIZE(qps)) 708 break; 709 if (qpp->s_tx->txreq.sg_count > avail) 710 break; 711 avail -= qpp->s_tx->txreq.sg_count; 712 list_del_init(&qpp->iowait); 713 rvt_get_qp(qp); 714 qps[n++] = qp; 715 } 716 717 spin_unlock(&dev->rdi.pending_lock); 718 719 for (i = 0; i < n; i++) { 720 qp = qps[i]; 721 spin_lock(&qp->s_lock); 722 if (qp->s_flags & RVT_S_WAIT_DMA_DESC) { 723 qp->s_flags &= ~RVT_S_WAIT_DMA_DESC; 724 qib_schedule_send(qp); 725 } 726 spin_unlock(&qp->s_lock); 727 rvt_put_qp(qp); 728 } 729 } 730 731 /* 732 * This is called with ppd->sdma_lock held. 733 */ 734 static void sdma_complete(struct qib_sdma_txreq *cookie, int status) 735 { 736 struct qib_verbs_txreq *tx = 737 container_of(cookie, struct qib_verbs_txreq, txreq); 738 struct rvt_qp *qp = tx->qp; 739 struct qib_qp_priv *priv = qp->priv; 740 741 spin_lock(&qp->s_lock); 742 if (tx->wqe) 743 qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS); 744 else if (qp->ibqp.qp_type == IB_QPT_RC) { 745 struct ib_header *hdr; 746 747 if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) 748 hdr = &tx->align_buf->hdr; 749 else { 750 struct qib_ibdev *dev = to_idev(qp->ibqp.device); 751 752 hdr = &dev->pio_hdrs[tx->hdr_inx].hdr; 753 } 754 qib_rc_send_complete(qp, hdr); 755 } 756 if (atomic_dec_and_test(&priv->s_dma_busy)) { 757 if (qp->state == IB_QPS_RESET) 758 wake_up(&priv->wait_dma); 759 else if (qp->s_flags & RVT_S_WAIT_DMA) { 760 qp->s_flags &= ~RVT_S_WAIT_DMA; 761 qib_schedule_send(qp); 762 } 763 } 764 spin_unlock(&qp->s_lock); 765 766 qib_put_txreq(tx); 767 } 768 769 static int wait_kmem(struct qib_ibdev *dev, struct rvt_qp *qp) 770 { 771 struct qib_qp_priv *priv = qp->priv; 772 unsigned long flags; 773 int ret = 0; 774 775 spin_lock_irqsave(&qp->s_lock, flags); 776 if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { 777 spin_lock(&dev->rdi.pending_lock); 778 if (list_empty(&priv->iowait)) { 779 if (list_empty(&dev->memwait)) 780 mod_timer(&dev->mem_timer, jiffies + 1); 781 qp->s_flags |= RVT_S_WAIT_KMEM; 782 list_add_tail(&priv->iowait, &dev->memwait); 783 } 784 spin_unlock(&dev->rdi.pending_lock); 785 qp->s_flags &= ~RVT_S_BUSY; 786 ret = -EBUSY; 787 } 788 spin_unlock_irqrestore(&qp->s_lock, flags); 789 790 return ret; 791 } 792 793 static int qib_verbs_send_dma(struct rvt_qp *qp, struct ib_header *hdr, 794 u32 hdrwords, struct rvt_sge_state *ss, u32 len, 795 u32 plen, u32 dwords) 796 { 797 struct qib_qp_priv *priv = qp->priv; 798 struct qib_ibdev *dev = to_idev(qp->ibqp.device); 799 struct qib_devdata *dd = dd_from_dev(dev); 800 struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 801 struct qib_pportdata *ppd = ppd_from_ibp(ibp); 802 struct qib_verbs_txreq *tx; 803 struct qib_pio_header *phdr; 804 u32 control; 805 u32 ndesc; 806 int ret; 807 808 tx = priv->s_tx; 809 if (tx) { 810 priv->s_tx = NULL; 811 /* resend previously constructed packet */ 812 ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx); 813 goto bail; 814 } 815 816 tx = get_txreq(dev, qp); 817 if (IS_ERR(tx)) 818 goto bail_tx; 819 820 control = dd->f_setpbc_control(ppd, plen, qp->s_srate, 821 be16_to_cpu(hdr->lrh[0]) >> 12); 822 tx->qp = qp; 823 tx->wqe = qp->s_wqe; 824 tx->mr = qp->s_rdma_mr; 825 if (qp->s_rdma_mr) 826 qp->s_rdma_mr = NULL; 827 tx->txreq.callback = sdma_complete; 828 if (dd->flags & QIB_HAS_SDMA_TIMEOUT) 829 tx->txreq.flags = QIB_SDMA_TXREQ_F_HEADTOHOST; 830 else 831 tx->txreq.flags = QIB_SDMA_TXREQ_F_INTREQ; 832 if (plen + 1 > dd->piosize2kmax_dwords) 833 tx->txreq.flags |= QIB_SDMA_TXREQ_F_USELARGEBUF; 834 835 if (len) { 836 /* 837 * Don't try to DMA if it takes more descriptors than 838 * the queue holds. 839 */ 840 ndesc = qib_count_sge(ss, len); 841 if (ndesc >= ppd->sdma_descq_cnt) 842 ndesc = 0; 843 } else 844 ndesc = 1; 845 if (ndesc) { 846 phdr = &dev->pio_hdrs[tx->hdr_inx]; 847 phdr->pbc[0] = cpu_to_le32(plen); 848 phdr->pbc[1] = cpu_to_le32(control); 849 memcpy(&phdr->hdr, hdr, hdrwords << 2); 850 tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEDESC; 851 tx->txreq.sg_count = ndesc; 852 tx->txreq.addr = dev->pio_hdrs_phys + 853 tx->hdr_inx * sizeof(struct qib_pio_header); 854 tx->hdr_dwords = hdrwords + 2; /* add PBC length */ 855 ret = qib_sdma_verbs_send(ppd, ss, dwords, tx); 856 goto bail; 857 } 858 859 /* Allocate a buffer and copy the header and payload to it. */ 860 tx->hdr_dwords = plen + 1; 861 phdr = kmalloc(tx->hdr_dwords << 2, GFP_ATOMIC); 862 if (!phdr) 863 goto err_tx; 864 phdr->pbc[0] = cpu_to_le32(plen); 865 phdr->pbc[1] = cpu_to_le32(control); 866 memcpy(&phdr->hdr, hdr, hdrwords << 2); 867 qib_copy_from_sge((u32 *) &phdr->hdr + hdrwords, ss, len); 868 869 tx->txreq.addr = dma_map_single(&dd->pcidev->dev, phdr, 870 tx->hdr_dwords << 2, DMA_TO_DEVICE); 871 if (dma_mapping_error(&dd->pcidev->dev, tx->txreq.addr)) 872 goto map_err; 873 tx->align_buf = phdr; 874 tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEBUF; 875 tx->txreq.sg_count = 1; 876 ret = qib_sdma_verbs_send(ppd, NULL, 0, tx); 877 goto unaligned; 878 879 map_err: 880 kfree(phdr); 881 err_tx: 882 qib_put_txreq(tx); 883 ret = wait_kmem(dev, qp); 884 unaligned: 885 ibp->rvp.n_unaligned++; 886 bail: 887 return ret; 888 bail_tx: 889 ret = PTR_ERR(tx); 890 goto bail; 891 } 892 893 /* 894 * If we are now in the error state, return zero to flush the 895 * send work request. 896 */ 897 static int no_bufs_available(struct rvt_qp *qp) 898 { 899 struct qib_qp_priv *priv = qp->priv; 900 struct qib_ibdev *dev = to_idev(qp->ibqp.device); 901 struct qib_devdata *dd; 902 unsigned long flags; 903 int ret = 0; 904 905 /* 906 * Note that as soon as want_buffer() is called and 907 * possibly before it returns, qib_ib_piobufavail() 908 * could be called. Therefore, put QP on the I/O wait list before 909 * enabling the PIO avail interrupt. 910 */ 911 spin_lock_irqsave(&qp->s_lock, flags); 912 if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { 913 spin_lock(&dev->rdi.pending_lock); 914 if (list_empty(&priv->iowait)) { 915 dev->n_piowait++; 916 qp->s_flags |= RVT_S_WAIT_PIO; 917 list_add_tail(&priv->iowait, &dev->piowait); 918 dd = dd_from_dev(dev); 919 dd->f_wantpiobuf_intr(dd, 1); 920 } 921 spin_unlock(&dev->rdi.pending_lock); 922 qp->s_flags &= ~RVT_S_BUSY; 923 ret = -EBUSY; 924 } 925 spin_unlock_irqrestore(&qp->s_lock, flags); 926 return ret; 927 } 928 929 static int qib_verbs_send_pio(struct rvt_qp *qp, struct ib_header *ibhdr, 930 u32 hdrwords, struct rvt_sge_state *ss, u32 len, 931 u32 plen, u32 dwords) 932 { 933 struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device); 934 struct qib_pportdata *ppd = dd->pport + qp->port_num - 1; 935 u32 *hdr = (u32 *) ibhdr; 936 u32 __iomem *piobuf_orig; 937 u32 __iomem *piobuf; 938 u64 pbc; 939 unsigned long flags; 940 unsigned flush_wc; 941 u32 control; 942 u32 pbufn; 943 944 control = dd->f_setpbc_control(ppd, plen, qp->s_srate, 945 be16_to_cpu(ibhdr->lrh[0]) >> 12); 946 pbc = ((u64) control << 32) | plen; 947 piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn); 948 if (unlikely(piobuf == NULL)) 949 return no_bufs_available(qp); 950 951 /* 952 * Write the pbc. 953 * We have to flush after the PBC for correctness on some cpus 954 * or WC buffer can be written out of order. 955 */ 956 writeq(pbc, piobuf); 957 piobuf_orig = piobuf; 958 piobuf += 2; 959 960 flush_wc = dd->flags & QIB_PIO_FLUSH_WC; 961 if (len == 0) { 962 /* 963 * If there is just the header portion, must flush before 964 * writing last word of header for correctness, and after 965 * the last header word (trigger word). 966 */ 967 if (flush_wc) { 968 qib_flush_wc(); 969 qib_pio_copy(piobuf, hdr, hdrwords - 1); 970 qib_flush_wc(); 971 __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); 972 qib_flush_wc(); 973 } else 974 qib_pio_copy(piobuf, hdr, hdrwords); 975 goto done; 976 } 977 978 if (flush_wc) 979 qib_flush_wc(); 980 qib_pio_copy(piobuf, hdr, hdrwords); 981 piobuf += hdrwords; 982 983 /* The common case is aligned and contained in one segment. */ 984 if (likely(ss->num_sge == 1 && len <= ss->sge.length && 985 !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { 986 u32 *addr = (u32 *) ss->sge.vaddr; 987 988 /* Update address before sending packet. */ 989 rvt_update_sge(ss, len, false); 990 if (flush_wc) { 991 qib_pio_copy(piobuf, addr, dwords - 1); 992 /* must flush early everything before trigger word */ 993 qib_flush_wc(); 994 __raw_writel(addr[dwords - 1], piobuf + dwords - 1); 995 /* be sure trigger word is written */ 996 qib_flush_wc(); 997 } else 998 qib_pio_copy(piobuf, addr, dwords); 999 goto done; 1000 } 1001 copy_io(piobuf, ss, len, flush_wc); 1002 done: 1003 if (dd->flags & QIB_USE_SPCL_TRIG) { 1004 u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023; 1005 1006 qib_flush_wc(); 1007 __raw_writel(0xaebecede, piobuf_orig + spcl_off); 1008 } 1009 qib_sendbuf_done(dd, pbufn); 1010 if (qp->s_rdma_mr) { 1011 rvt_put_mr(qp->s_rdma_mr); 1012 qp->s_rdma_mr = NULL; 1013 } 1014 if (qp->s_wqe) { 1015 spin_lock_irqsave(&qp->s_lock, flags); 1016 qib_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); 1017 spin_unlock_irqrestore(&qp->s_lock, flags); 1018 } else if (qp->ibqp.qp_type == IB_QPT_RC) { 1019 spin_lock_irqsave(&qp->s_lock, flags); 1020 qib_rc_send_complete(qp, ibhdr); 1021 spin_unlock_irqrestore(&qp->s_lock, flags); 1022 } 1023 return 0; 1024 } 1025 1026 /** 1027 * qib_verbs_send - send a packet 1028 * @qp: the QP to send on 1029 * @hdr: the packet header 1030 * @hdrwords: the number of 32-bit words in the header 1031 * @ss: the SGE to send 1032 * @len: the length of the packet in bytes 1033 * 1034 * Return zero if packet is sent or queued OK. 1035 * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise. 1036 */ 1037 int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr, 1038 u32 hdrwords, struct rvt_sge_state *ss, u32 len) 1039 { 1040 struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device); 1041 u32 plen; 1042 int ret; 1043 u32 dwords = (len + 3) >> 2; 1044 1045 /* 1046 * Calculate the send buffer trigger address. 1047 * The +1 counts for the pbc control dword following the pbc length. 1048 */ 1049 plen = hdrwords + dwords + 1; 1050 1051 /* 1052 * VL15 packets (IB_QPT_SMI) will always use PIO, so we 1053 * can defer SDMA restart until link goes ACTIVE without 1054 * worrying about just how we got there. 1055 */ 1056 if (qp->ibqp.qp_type == IB_QPT_SMI || 1057 !(dd->flags & QIB_HAS_SEND_DMA)) 1058 ret = qib_verbs_send_pio(qp, hdr, hdrwords, ss, len, 1059 plen, dwords); 1060 else 1061 ret = qib_verbs_send_dma(qp, hdr, hdrwords, ss, len, 1062 plen, dwords); 1063 1064 return ret; 1065 } 1066 1067 int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords, 1068 u64 *rwords, u64 *spkts, u64 *rpkts, 1069 u64 *xmit_wait) 1070 { 1071 int ret; 1072 struct qib_devdata *dd = ppd->dd; 1073 1074 if (!(dd->flags & QIB_PRESENT)) { 1075 /* no hardware, freeze, etc. */ 1076 ret = -EINVAL; 1077 goto bail; 1078 } 1079 *swords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDSEND); 1080 *rwords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDRCV); 1081 *spkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTSEND); 1082 *rpkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTRCV); 1083 *xmit_wait = dd->f_portcntr(ppd, QIBPORTCNTR_SENDSTALL); 1084 1085 ret = 0; 1086 1087 bail: 1088 return ret; 1089 } 1090 1091 /** 1092 * qib_get_counters - get various chip counters 1093 * @dd: the qlogic_ib device 1094 * @cntrs: counters are placed here 1095 * 1096 * Return the counters needed by recv_pma_get_portcounters(). 1097 */ 1098 int qib_get_counters(struct qib_pportdata *ppd, 1099 struct qib_verbs_counters *cntrs) 1100 { 1101 int ret; 1102 1103 if (!(ppd->dd->flags & QIB_PRESENT)) { 1104 /* no hardware, freeze, etc. */ 1105 ret = -EINVAL; 1106 goto bail; 1107 } 1108 cntrs->symbol_error_counter = 1109 ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBSYMBOLERR); 1110 cntrs->link_error_recovery_counter = 1111 ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKERRRECOV); 1112 /* 1113 * The link downed counter counts when the other side downs the 1114 * connection. We add in the number of times we downed the link 1115 * due to local link integrity errors to compensate. 1116 */ 1117 cntrs->link_downed_counter = 1118 ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKDOWN); 1119 cntrs->port_rcv_errors = 1120 ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXDROPPKT) + 1121 ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVOVFL) + 1122 ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERR_RLEN) + 1123 ppd->dd->f_portcntr(ppd, QIBPORTCNTR_INVALIDRLEN) + 1124 ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLINK) + 1125 ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRICRC) + 1126 ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRVCRC) + 1127 ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLPCRC) + 1128 ppd->dd->f_portcntr(ppd, QIBPORTCNTR_BADFORMAT); 1129 cntrs->port_rcv_errors += 1130 ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXLOCALPHYERR); 1131 cntrs->port_rcv_errors += 1132 ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXVLERR); 1133 cntrs->port_rcv_remphys_errors = 1134 ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVEBP); 1135 cntrs->port_xmit_discards = 1136 ppd->dd->f_portcntr(ppd, QIBPORTCNTR_UNSUPVL); 1137 cntrs->port_xmit_data = ppd->dd->f_portcntr(ppd, 1138 QIBPORTCNTR_WORDSEND); 1139 cntrs->port_rcv_data = ppd->dd->f_portcntr(ppd, 1140 QIBPORTCNTR_WORDRCV); 1141 cntrs->port_xmit_packets = ppd->dd->f_portcntr(ppd, 1142 QIBPORTCNTR_PKTSEND); 1143 cntrs->port_rcv_packets = ppd->dd->f_portcntr(ppd, 1144 QIBPORTCNTR_PKTRCV); 1145 cntrs->local_link_integrity_errors = 1146 ppd->dd->f_portcntr(ppd, QIBPORTCNTR_LLI); 1147 cntrs->excessive_buffer_overrun_errors = 1148 ppd->dd->f_portcntr(ppd, QIBPORTCNTR_EXCESSBUFOVFL); 1149 cntrs->vl15_dropped = 1150 ppd->dd->f_portcntr(ppd, QIBPORTCNTR_VL15PKTDROP); 1151 1152 ret = 0; 1153 1154 bail: 1155 return ret; 1156 } 1157 1158 /** 1159 * qib_ib_piobufavail - callback when a PIO buffer is available 1160 * @dd: the device pointer 1161 * 1162 * This is called from qib_intr() at interrupt level when a PIO buffer is 1163 * available after qib_verbs_send() returned an error that no buffers were 1164 * available. Disable the interrupt if there are no more QPs waiting. 1165 */ 1166 void qib_ib_piobufavail(struct qib_devdata *dd) 1167 { 1168 struct qib_ibdev *dev = &dd->verbs_dev; 1169 struct list_head *list; 1170 struct rvt_qp *qps[5]; 1171 struct rvt_qp *qp; 1172 unsigned long flags; 1173 unsigned i, n; 1174 struct qib_qp_priv *priv; 1175 1176 list = &dev->piowait; 1177 n = 0; 1178 1179 /* 1180 * Note: checking that the piowait list is empty and clearing 1181 * the buffer available interrupt needs to be atomic or we 1182 * could end up with QPs on the wait list with the interrupt 1183 * disabled. 1184 */ 1185 spin_lock_irqsave(&dev->rdi.pending_lock, flags); 1186 while (!list_empty(list)) { 1187 if (n == ARRAY_SIZE(qps)) 1188 goto full; 1189 priv = list_entry(list->next, struct qib_qp_priv, iowait); 1190 qp = priv->owner; 1191 list_del_init(&priv->iowait); 1192 rvt_get_qp(qp); 1193 qps[n++] = qp; 1194 } 1195 dd->f_wantpiobuf_intr(dd, 0); 1196 full: 1197 spin_unlock_irqrestore(&dev->rdi.pending_lock, flags); 1198 1199 for (i = 0; i < n; i++) { 1200 qp = qps[i]; 1201 1202 spin_lock_irqsave(&qp->s_lock, flags); 1203 if (qp->s_flags & RVT_S_WAIT_PIO) { 1204 qp->s_flags &= ~RVT_S_WAIT_PIO; 1205 qib_schedule_send(qp); 1206 } 1207 spin_unlock_irqrestore(&qp->s_lock, flags); 1208 1209 /* Notify qib_destroy_qp() if it is waiting. */ 1210 rvt_put_qp(qp); 1211 } 1212 } 1213 1214 static int qib_query_port(struct rvt_dev_info *rdi, u8 port_num, 1215 struct ib_port_attr *props) 1216 { 1217 struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi); 1218 struct qib_devdata *dd = dd_from_dev(ibdev); 1219 struct qib_pportdata *ppd = &dd->pport[port_num - 1]; 1220 enum ib_mtu mtu; 1221 u16 lid = ppd->lid; 1222 1223 /* props being zeroed by the caller, avoid zeroing it here */ 1224 props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE); 1225 props->lmc = ppd->lmc; 1226 props->state = dd->f_iblink_state(ppd->lastibcstat); 1227 props->phys_state = dd->f_ibphys_portstate(ppd->lastibcstat); 1228 props->gid_tbl_len = QIB_GUIDS_PER_PORT; 1229 props->active_width = ppd->link_width_active; 1230 /* See rate_show() */ 1231 props->active_speed = ppd->link_speed_active; 1232 props->max_vl_num = qib_num_vls(ppd->vls_supported); 1233 1234 props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096; 1235 switch (ppd->ibmtu) { 1236 case 4096: 1237 mtu = IB_MTU_4096; 1238 break; 1239 case 2048: 1240 mtu = IB_MTU_2048; 1241 break; 1242 case 1024: 1243 mtu = IB_MTU_1024; 1244 break; 1245 case 512: 1246 mtu = IB_MTU_512; 1247 break; 1248 case 256: 1249 mtu = IB_MTU_256; 1250 break; 1251 default: 1252 mtu = IB_MTU_2048; 1253 } 1254 props->active_mtu = mtu; 1255 1256 return 0; 1257 } 1258 1259 static int qib_modify_device(struct ib_device *device, 1260 int device_modify_mask, 1261 struct ib_device_modify *device_modify) 1262 { 1263 struct qib_devdata *dd = dd_from_ibdev(device); 1264 unsigned i; 1265 int ret; 1266 1267 if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | 1268 IB_DEVICE_MODIFY_NODE_DESC)) { 1269 ret = -EOPNOTSUPP; 1270 goto bail; 1271 } 1272 1273 if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) { 1274 memcpy(device->node_desc, device_modify->node_desc, 1275 IB_DEVICE_NODE_DESC_MAX); 1276 for (i = 0; i < dd->num_pports; i++) { 1277 struct qib_ibport *ibp = &dd->pport[i].ibport_data; 1278 1279 qib_node_desc_chg(ibp); 1280 } 1281 } 1282 1283 if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) { 1284 ib_qib_sys_image_guid = 1285 cpu_to_be64(device_modify->sys_image_guid); 1286 for (i = 0; i < dd->num_pports; i++) { 1287 struct qib_ibport *ibp = &dd->pport[i].ibport_data; 1288 1289 qib_sys_guid_chg(ibp); 1290 } 1291 } 1292 1293 ret = 0; 1294 1295 bail: 1296 return ret; 1297 } 1298 1299 static int qib_shut_down_port(struct rvt_dev_info *rdi, u8 port_num) 1300 { 1301 struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi); 1302 struct qib_devdata *dd = dd_from_dev(ibdev); 1303 struct qib_pportdata *ppd = &dd->pport[port_num - 1]; 1304 1305 qib_set_linkstate(ppd, QIB_IB_LINKDOWN); 1306 1307 return 0; 1308 } 1309 1310 static int qib_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, 1311 int guid_index, __be64 *guid) 1312 { 1313 struct qib_ibport *ibp = container_of(rvp, struct qib_ibport, rvp); 1314 struct qib_pportdata *ppd = ppd_from_ibp(ibp); 1315 1316 if (guid_index == 0) 1317 *guid = ppd->guid; 1318 else if (guid_index < QIB_GUIDS_PER_PORT) 1319 *guid = ibp->guids[guid_index - 1]; 1320 else 1321 return -EINVAL; 1322 1323 return 0; 1324 } 1325 1326 int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) 1327 { 1328 if (ah_attr->sl > 15) 1329 return -EINVAL; 1330 1331 return 0; 1332 } 1333 1334 static void qib_notify_new_ah(struct ib_device *ibdev, 1335 struct ib_ah_attr *ah_attr, 1336 struct rvt_ah *ah) 1337 { 1338 struct qib_ibport *ibp; 1339 struct qib_pportdata *ppd; 1340 1341 /* 1342 * Do not trust reading anything from rvt_ah at this point as it is not 1343 * done being setup. We can however modify things which we need to set. 1344 */ 1345 1346 ibp = to_iport(ibdev, ah_attr->port_num); 1347 ppd = ppd_from_ibp(ibp); 1348 ah->vl = ibp->sl_to_vl[ah->attr.sl]; 1349 ah->log_pmtu = ilog2(ppd->ibmtu); 1350 } 1351 1352 struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid) 1353 { 1354 struct ib_ah_attr attr; 1355 struct ib_ah *ah = ERR_PTR(-EINVAL); 1356 struct rvt_qp *qp0; 1357 1358 memset(&attr, 0, sizeof(attr)); 1359 attr.dlid = dlid; 1360 attr.port_num = ppd_from_ibp(ibp)->port; 1361 rcu_read_lock(); 1362 qp0 = rcu_dereference(ibp->rvp.qp[0]); 1363 if (qp0) 1364 ah = ib_create_ah(qp0->ibqp.pd, &attr); 1365 rcu_read_unlock(); 1366 return ah; 1367 } 1368 1369 /** 1370 * qib_get_npkeys - return the size of the PKEY table for context 0 1371 * @dd: the qlogic_ib device 1372 */ 1373 unsigned qib_get_npkeys(struct qib_devdata *dd) 1374 { 1375 return ARRAY_SIZE(dd->rcd[0]->pkeys); 1376 } 1377 1378 /* 1379 * Return the indexed PKEY from the port PKEY table. 1380 * No need to validate rcd[ctxt]; the port is setup if we are here. 1381 */ 1382 unsigned qib_get_pkey(struct qib_ibport *ibp, unsigned index) 1383 { 1384 struct qib_pportdata *ppd = ppd_from_ibp(ibp); 1385 struct qib_devdata *dd = ppd->dd; 1386 unsigned ctxt = ppd->hw_pidx; 1387 unsigned ret; 1388 1389 /* dd->rcd null if mini_init or some init failures */ 1390 if (!dd->rcd || index >= ARRAY_SIZE(dd->rcd[ctxt]->pkeys)) 1391 ret = 0; 1392 else 1393 ret = dd->rcd[ctxt]->pkeys[index]; 1394 1395 return ret; 1396 } 1397 1398 static void init_ibport(struct qib_pportdata *ppd) 1399 { 1400 struct qib_verbs_counters cntrs; 1401 struct qib_ibport *ibp = &ppd->ibport_data; 1402 1403 spin_lock_init(&ibp->rvp.lock); 1404 /* Set the prefix to the default value (see ch. 4.1.1) */ 1405 ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX; 1406 ibp->rvp.sm_lid = be16_to_cpu(IB_LID_PERMISSIVE); 1407 ibp->rvp.port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP | 1408 IB_PORT_CLIENT_REG_SUP | IB_PORT_SL_MAP_SUP | 1409 IB_PORT_TRAP_SUP | IB_PORT_AUTO_MIGR_SUP | 1410 IB_PORT_DR_NOTICE_SUP | IB_PORT_CAP_MASK_NOTICE_SUP | 1411 IB_PORT_OTHER_LOCAL_CHANGES_SUP; 1412 if (ppd->dd->flags & QIB_HAS_LINK_LATENCY) 1413 ibp->rvp.port_cap_flags |= IB_PORT_LINK_LATENCY_SUP; 1414 ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; 1415 ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; 1416 ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; 1417 ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS; 1418 ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT; 1419 1420 /* Snapshot current HW counters to "clear" them. */ 1421 qib_get_counters(ppd, &cntrs); 1422 ibp->z_symbol_error_counter = cntrs.symbol_error_counter; 1423 ibp->z_link_error_recovery_counter = 1424 cntrs.link_error_recovery_counter; 1425 ibp->z_link_downed_counter = cntrs.link_downed_counter; 1426 ibp->z_port_rcv_errors = cntrs.port_rcv_errors; 1427 ibp->z_port_rcv_remphys_errors = cntrs.port_rcv_remphys_errors; 1428 ibp->z_port_xmit_discards = cntrs.port_xmit_discards; 1429 ibp->z_port_xmit_data = cntrs.port_xmit_data; 1430 ibp->z_port_rcv_data = cntrs.port_rcv_data; 1431 ibp->z_port_xmit_packets = cntrs.port_xmit_packets; 1432 ibp->z_port_rcv_packets = cntrs.port_rcv_packets; 1433 ibp->z_local_link_integrity_errors = 1434 cntrs.local_link_integrity_errors; 1435 ibp->z_excessive_buffer_overrun_errors = 1436 cntrs.excessive_buffer_overrun_errors; 1437 ibp->z_vl15_dropped = cntrs.vl15_dropped; 1438 RCU_INIT_POINTER(ibp->rvp.qp[0], NULL); 1439 RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); 1440 } 1441 1442 /** 1443 * qib_fill_device_attr - Fill in rvt dev info device attributes. 1444 * @dd: the device data structure 1445 */ 1446 static void qib_fill_device_attr(struct qib_devdata *dd) 1447 { 1448 struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; 1449 1450 memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props)); 1451 1452 rdi->dparms.props.max_pd = ib_qib_max_pds; 1453 rdi->dparms.props.max_ah = ib_qib_max_ahs; 1454 rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | 1455 IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | 1456 IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | 1457 IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; 1458 rdi->dparms.props.page_size_cap = PAGE_SIZE; 1459 rdi->dparms.props.vendor_id = 1460 QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3; 1461 rdi->dparms.props.vendor_part_id = dd->deviceid; 1462 rdi->dparms.props.hw_ver = dd->minrev; 1463 rdi->dparms.props.sys_image_guid = ib_qib_sys_image_guid; 1464 rdi->dparms.props.max_mr_size = ~0ULL; 1465 rdi->dparms.props.max_qp = ib_qib_max_qps; 1466 rdi->dparms.props.max_qp_wr = ib_qib_max_qp_wrs; 1467 rdi->dparms.props.max_sge = ib_qib_max_sges; 1468 rdi->dparms.props.max_sge_rd = ib_qib_max_sges; 1469 rdi->dparms.props.max_cq = ib_qib_max_cqs; 1470 rdi->dparms.props.max_cqe = ib_qib_max_cqes; 1471 rdi->dparms.props.max_ah = ib_qib_max_ahs; 1472 rdi->dparms.props.max_mr = rdi->lkey_table.max; 1473 rdi->dparms.props.max_fmr = rdi->lkey_table.max; 1474 rdi->dparms.props.max_map_per_fmr = 32767; 1475 rdi->dparms.props.max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC; 1476 rdi->dparms.props.max_qp_init_rd_atom = 255; 1477 rdi->dparms.props.max_srq = ib_qib_max_srqs; 1478 rdi->dparms.props.max_srq_wr = ib_qib_max_srq_wrs; 1479 rdi->dparms.props.max_srq_sge = ib_qib_max_srq_sges; 1480 rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB; 1481 rdi->dparms.props.max_pkeys = qib_get_npkeys(dd); 1482 rdi->dparms.props.max_mcast_grp = ib_qib_max_mcast_grps; 1483 rdi->dparms.props.max_mcast_qp_attach = ib_qib_max_mcast_qp_attached; 1484 rdi->dparms.props.max_total_mcast_qp_attach = 1485 rdi->dparms.props.max_mcast_qp_attach * 1486 rdi->dparms.props.max_mcast_grp; 1487 /* post send table */ 1488 dd->verbs_dev.rdi.post_parms = qib_post_parms; 1489 } 1490 1491 /** 1492 * qib_register_ib_device - register our device with the infiniband core 1493 * @dd: the device data structure 1494 * Return the allocated qib_ibdev pointer or NULL on error. 1495 */ 1496 int qib_register_ib_device(struct qib_devdata *dd) 1497 { 1498 struct qib_ibdev *dev = &dd->verbs_dev; 1499 struct ib_device *ibdev = &dev->rdi.ibdev; 1500 struct qib_pportdata *ppd = dd->pport; 1501 unsigned i, ctxt; 1502 int ret; 1503 1504 get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd)); 1505 for (i = 0; i < dd->num_pports; i++) 1506 init_ibport(ppd + i); 1507 1508 /* Only need to initialize non-zero fields. */ 1509 setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev); 1510 1511 INIT_LIST_HEAD(&dev->piowait); 1512 INIT_LIST_HEAD(&dev->dmawait); 1513 INIT_LIST_HEAD(&dev->txwait); 1514 INIT_LIST_HEAD(&dev->memwait); 1515 INIT_LIST_HEAD(&dev->txreq_free); 1516 1517 if (ppd->sdma_descq_cnt) { 1518 dev->pio_hdrs = dma_alloc_coherent(&dd->pcidev->dev, 1519 ppd->sdma_descq_cnt * 1520 sizeof(struct qib_pio_header), 1521 &dev->pio_hdrs_phys, 1522 GFP_KERNEL); 1523 if (!dev->pio_hdrs) { 1524 ret = -ENOMEM; 1525 goto err_hdrs; 1526 } 1527 } 1528 1529 for (i = 0; i < ppd->sdma_descq_cnt; i++) { 1530 struct qib_verbs_txreq *tx; 1531 1532 tx = kzalloc(sizeof(*tx), GFP_KERNEL); 1533 if (!tx) { 1534 ret = -ENOMEM; 1535 goto err_tx; 1536 } 1537 tx->hdr_inx = i; 1538 list_add(&tx->txreq.list, &dev->txreq_free); 1539 } 1540 1541 /* 1542 * The system image GUID is supposed to be the same for all 1543 * IB HCAs in a single system but since there can be other 1544 * device types in the system, we can't be sure this is unique. 1545 */ 1546 if (!ib_qib_sys_image_guid) 1547 ib_qib_sys_image_guid = ppd->guid; 1548 1549 strlcpy(ibdev->name, "qib%d", IB_DEVICE_NAME_MAX); 1550 ibdev->owner = THIS_MODULE; 1551 ibdev->node_guid = ppd->guid; 1552 ibdev->phys_port_cnt = dd->num_pports; 1553 ibdev->dev.parent = &dd->pcidev->dev; 1554 ibdev->modify_device = qib_modify_device; 1555 ibdev->process_mad = qib_process_mad; 1556 1557 snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), 1558 "Intel Infiniband HCA %s", init_utsname()->nodename); 1559 1560 /* 1561 * Fill in rvt info object. 1562 */ 1563 dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files; 1564 dd->verbs_dev.rdi.driver_f.get_card_name = qib_get_card_name; 1565 dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev; 1566 dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah; 1567 dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe; 1568 dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah; 1569 dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn; 1570 dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc; 1571 dd->verbs_dev.rdi.driver_f.qp_priv_free = qib_qp_priv_free; 1572 dd->verbs_dev.rdi.driver_f.free_all_qps = qib_free_all_qps; 1573 dd->verbs_dev.rdi.driver_f.notify_qp_reset = qib_notify_qp_reset; 1574 dd->verbs_dev.rdi.driver_f.do_send = qib_do_send; 1575 dd->verbs_dev.rdi.driver_f.schedule_send = qib_schedule_send; 1576 dd->verbs_dev.rdi.driver_f.quiesce_qp = qib_quiesce_qp; 1577 dd->verbs_dev.rdi.driver_f.stop_send_queue = qib_stop_send_queue; 1578 dd->verbs_dev.rdi.driver_f.flush_qp_waiters = qib_flush_qp_waiters; 1579 dd->verbs_dev.rdi.driver_f.notify_error_qp = qib_notify_error_qp; 1580 dd->verbs_dev.rdi.driver_f.notify_restart_rc = qib_restart_rc; 1581 dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu; 1582 dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp; 1583 dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr; 1584 dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _qib_schedule_send; 1585 dd->verbs_dev.rdi.driver_f.query_port_state = qib_query_port; 1586 dd->verbs_dev.rdi.driver_f.shut_down_port = qib_shut_down_port; 1587 dd->verbs_dev.rdi.driver_f.cap_mask_chg = qib_cap_mask_chg; 1588 dd->verbs_dev.rdi.driver_f.notify_create_mad_agent = 1589 qib_notify_create_mad_agent; 1590 dd->verbs_dev.rdi.driver_f.notify_free_mad_agent = 1591 qib_notify_free_mad_agent; 1592 1593 dd->verbs_dev.rdi.dparms.max_rdma_atomic = QIB_MAX_RDMA_ATOMIC; 1594 dd->verbs_dev.rdi.driver_f.get_guid_be = qib_get_guid_be; 1595 dd->verbs_dev.rdi.dparms.lkey_table_size = qib_lkey_table_size; 1596 dd->verbs_dev.rdi.dparms.qp_table_size = ib_qib_qp_table_size; 1597 dd->verbs_dev.rdi.dparms.qpn_start = 1; 1598 dd->verbs_dev.rdi.dparms.qpn_res_start = QIB_KD_QP; 1599 dd->verbs_dev.rdi.dparms.qpn_res_end = QIB_KD_QP; /* Reserve one QP */ 1600 dd->verbs_dev.rdi.dparms.qpn_inc = 1; 1601 dd->verbs_dev.rdi.dparms.qos_shift = 1; 1602 dd->verbs_dev.rdi.dparms.psn_mask = QIB_PSN_MASK; 1603 dd->verbs_dev.rdi.dparms.psn_shift = QIB_PSN_SHIFT; 1604 dd->verbs_dev.rdi.dparms.psn_modify_mask = QIB_PSN_MASK; 1605 dd->verbs_dev.rdi.dparms.nports = dd->num_pports; 1606 dd->verbs_dev.rdi.dparms.npkeys = qib_get_npkeys(dd); 1607 dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id; 1608 dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB; 1609 dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE; 1610 1611 snprintf(dd->verbs_dev.rdi.dparms.cq_name, 1612 sizeof(dd->verbs_dev.rdi.dparms.cq_name), 1613 "qib_cq%d", dd->unit); 1614 1615 qib_fill_device_attr(dd); 1616 1617 ppd = dd->pport; 1618 for (i = 0; i < dd->num_pports; i++, ppd++) { 1619 ctxt = ppd->hw_pidx; 1620 rvt_init_port(&dd->verbs_dev.rdi, 1621 &ppd->ibport_data.rvp, 1622 i, 1623 dd->rcd[ctxt]->pkeys); 1624 } 1625 1626 ret = rvt_register_device(&dd->verbs_dev.rdi); 1627 if (ret) 1628 goto err_tx; 1629 1630 ret = qib_verbs_register_sysfs(dd); 1631 if (ret) 1632 goto err_class; 1633 1634 return ret; 1635 1636 err_class: 1637 rvt_unregister_device(&dd->verbs_dev.rdi); 1638 err_tx: 1639 while (!list_empty(&dev->txreq_free)) { 1640 struct list_head *l = dev->txreq_free.next; 1641 struct qib_verbs_txreq *tx; 1642 1643 list_del(l); 1644 tx = list_entry(l, struct qib_verbs_txreq, txreq.list); 1645 kfree(tx); 1646 } 1647 if (ppd->sdma_descq_cnt) 1648 dma_free_coherent(&dd->pcidev->dev, 1649 ppd->sdma_descq_cnt * 1650 sizeof(struct qib_pio_header), 1651 dev->pio_hdrs, dev->pio_hdrs_phys); 1652 err_hdrs: 1653 qib_dev_err(dd, "cannot register verbs: %d!\n", -ret); 1654 return ret; 1655 } 1656 1657 void qib_unregister_ib_device(struct qib_devdata *dd) 1658 { 1659 struct qib_ibdev *dev = &dd->verbs_dev; 1660 1661 qib_verbs_unregister_sysfs(dd); 1662 1663 rvt_unregister_device(&dd->verbs_dev.rdi); 1664 1665 if (!list_empty(&dev->piowait)) 1666 qib_dev_err(dd, "piowait list not empty!\n"); 1667 if (!list_empty(&dev->dmawait)) 1668 qib_dev_err(dd, "dmawait list not empty!\n"); 1669 if (!list_empty(&dev->txwait)) 1670 qib_dev_err(dd, "txwait list not empty!\n"); 1671 if (!list_empty(&dev->memwait)) 1672 qib_dev_err(dd, "memwait list not empty!\n"); 1673 1674 del_timer_sync(&dev->mem_timer); 1675 while (!list_empty(&dev->txreq_free)) { 1676 struct list_head *l = dev->txreq_free.next; 1677 struct qib_verbs_txreq *tx; 1678 1679 list_del(l); 1680 tx = list_entry(l, struct qib_verbs_txreq, txreq.list); 1681 kfree(tx); 1682 } 1683 if (dd->pport->sdma_descq_cnt) 1684 dma_free_coherent(&dd->pcidev->dev, 1685 dd->pport->sdma_descq_cnt * 1686 sizeof(struct qib_pio_header), 1687 dev->pio_hdrs, dev->pio_hdrs_phys); 1688 } 1689 1690 /** 1691 * _qib_schedule_send - schedule progress 1692 * @qp - the qp 1693 * 1694 * This schedules progress w/o regard to the s_flags. 1695 * 1696 * It is only used in post send, which doesn't hold 1697 * the s_lock. 1698 */ 1699 void _qib_schedule_send(struct rvt_qp *qp) 1700 { 1701 struct qib_ibport *ibp = 1702 to_iport(qp->ibqp.device, qp->port_num); 1703 struct qib_pportdata *ppd = ppd_from_ibp(ibp); 1704 struct qib_qp_priv *priv = qp->priv; 1705 1706 queue_work(ppd->qib_wq, &priv->s_work); 1707 } 1708 1709 /** 1710 * qib_schedule_send - schedule progress 1711 * @qp - the qp 1712 * 1713 * This schedules qp progress. The s_lock 1714 * should be held. 1715 */ 1716 void qib_schedule_send(struct rvt_qp *qp) 1717 { 1718 if (qib_send_ok(qp)) 1719 _qib_schedule_send(qp); 1720 } 1721