1 /* 2 * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. 3 * All rights reserved. 4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35 #include <linux/pci.h> 36 #include <linux/netdevice.h> 37 #include <linux/vmalloc.h> 38 #include <linux/delay.h> 39 #include <linux/idr.h> 40 41 #include "qib.h" 42 #include "qib_common.h" 43 44 /* 45 * min buffers we want to have per context, after driver 46 */ 47 #define QIB_MIN_USER_CTXT_BUFCNT 7 48 49 #define QLOGIC_IB_R_SOFTWARE_MASK 0xFF 50 #define QLOGIC_IB_R_SOFTWARE_SHIFT 24 51 #define QLOGIC_IB_R_EMULATOR_MASK (1ULL<<62) 52 53 /* 54 * Number of ctxts we are configured to use (to allow for more pio 55 * buffers per ctxt, etc.) Zero means use chip value. 56 */ 57 ushort qib_cfgctxts; 58 module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO); 59 MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use"); 60 61 /* 62 * If set, do not write to any regs if avoidable, hack to allow 63 * check for deranged default register values. 64 */ 65 ushort qib_mini_init; 66 module_param_named(mini_init, qib_mini_init, ushort, S_IRUGO); 67 MODULE_PARM_DESC(mini_init, "If set, do minimal diag init"); 68 69 unsigned qib_n_krcv_queues; 70 module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO); 71 MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port"); 72 73 /* 74 * qib_wc_pat parameter: 75 * 0 is WC via MTRR 76 * 1 is WC via PAT 77 * If PAT initialization fails, code reverts back to MTRR 78 */ 79 unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */ 80 module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO); 81 MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism"); 82 83 struct workqueue_struct *qib_cq_wq; 84 85 static void verify_interrupt(unsigned long); 86 87 static struct idr qib_unit_table; 88 u32 qib_cpulist_count; 89 unsigned long *qib_cpulist; 90 91 /* set number of contexts we'll actually use */ 92 void qib_set_ctxtcnt(struct qib_devdata *dd) 93 { 94 if (!qib_cfgctxts) { 95 dd->cfgctxts = dd->first_user_ctxt + num_online_cpus(); 96 if (dd->cfgctxts > dd->ctxtcnt) 97 dd->cfgctxts = dd->ctxtcnt; 98 } else if (qib_cfgctxts < dd->num_pports) 99 dd->cfgctxts = dd->ctxtcnt; 100 else if (qib_cfgctxts <= dd->ctxtcnt) 101 dd->cfgctxts = qib_cfgctxts; 102 else 103 dd->cfgctxts = dd->ctxtcnt; 104 } 105 106 /* 107 * Common code for creating the receive context array. 108 */ 109 int qib_create_ctxts(struct qib_devdata *dd) 110 { 111 unsigned i; 112 int ret; 113 114 /* 115 * Allocate full ctxtcnt array, rather than just cfgctxts, because 116 * cleanup iterates across all possible ctxts. 117 */ 118 dd->rcd = kzalloc(sizeof(*dd->rcd) * dd->ctxtcnt, GFP_KERNEL); 119 if (!dd->rcd) { 120 qib_dev_err(dd, "Unable to allocate ctxtdata array, " 121 "failing\n"); 122 ret = -ENOMEM; 123 goto done; 124 } 125 126 /* create (one or more) kctxt */ 127 for (i = 0; i < dd->first_user_ctxt; ++i) { 128 struct qib_pportdata *ppd; 129 struct qib_ctxtdata *rcd; 130 131 if (dd->skip_kctxt_mask & (1 << i)) 132 continue; 133 134 ppd = dd->pport + (i % dd->num_pports); 135 rcd = qib_create_ctxtdata(ppd, i); 136 if (!rcd) { 137 qib_dev_err(dd, "Unable to allocate ctxtdata" 138 " for Kernel ctxt, failing\n"); 139 ret = -ENOMEM; 140 goto done; 141 } 142 rcd->pkeys[0] = QIB_DEFAULT_P_KEY; 143 rcd->seq_cnt = 1; 144 } 145 ret = 0; 146 done: 147 return ret; 148 } 149 150 /* 151 * Common code for user and kernel context setup. 152 */ 153 struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt) 154 { 155 struct qib_devdata *dd = ppd->dd; 156 struct qib_ctxtdata *rcd; 157 158 rcd = kzalloc(sizeof(*rcd), GFP_KERNEL); 159 if (rcd) { 160 INIT_LIST_HEAD(&rcd->qp_wait_list); 161 rcd->ppd = ppd; 162 rcd->dd = dd; 163 rcd->cnt = 1; 164 rcd->ctxt = ctxt; 165 dd->rcd[ctxt] = rcd; 166 167 dd->f_init_ctxt(rcd); 168 169 /* 170 * To avoid wasting a lot of memory, we allocate 32KB chunks 171 * of physically contiguous memory, advance through it until 172 * used up and then allocate more. Of course, we need 173 * memory to store those extra pointers, now. 32KB seems to 174 * be the most that is "safe" under memory pressure 175 * (creating large files and then copying them over 176 * NFS while doing lots of MPI jobs). The OOM killer can 177 * get invoked, even though we say we can sleep and this can 178 * cause significant system problems.... 179 */ 180 rcd->rcvegrbuf_size = 0x8000; 181 rcd->rcvegrbufs_perchunk = 182 rcd->rcvegrbuf_size / dd->rcvegrbufsize; 183 rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt + 184 rcd->rcvegrbufs_perchunk - 1) / 185 rcd->rcvegrbufs_perchunk; 186 } 187 return rcd; 188 } 189 190 /* 191 * Common code for initializing the physical port structure. 192 */ 193 void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, 194 u8 hw_pidx, u8 port) 195 { 196 ppd->dd = dd; 197 ppd->hw_pidx = hw_pidx; 198 ppd->port = port; /* IB port number, not index */ 199 200 spin_lock_init(&ppd->sdma_lock); 201 spin_lock_init(&ppd->lflags_lock); 202 init_waitqueue_head(&ppd->state_wait); 203 204 init_timer(&ppd->symerr_clear_timer); 205 ppd->symerr_clear_timer.function = qib_clear_symerror_on_linkup; 206 ppd->symerr_clear_timer.data = (unsigned long)ppd; 207 } 208 209 static int init_pioavailregs(struct qib_devdata *dd) 210 { 211 int ret, pidx; 212 u64 *status_page; 213 214 dd->pioavailregs_dma = dma_alloc_coherent( 215 &dd->pcidev->dev, PAGE_SIZE, &dd->pioavailregs_phys, 216 GFP_KERNEL); 217 if (!dd->pioavailregs_dma) { 218 qib_dev_err(dd, "failed to allocate PIOavail reg area " 219 "in memory\n"); 220 ret = -ENOMEM; 221 goto done; 222 } 223 224 /* 225 * We really want L2 cache aligned, but for current CPUs of 226 * interest, they are the same. 227 */ 228 status_page = (u64 *) 229 ((char *) dd->pioavailregs_dma + 230 ((2 * L1_CACHE_BYTES + 231 dd->pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES)); 232 /* device status comes first, for backwards compatibility */ 233 dd->devstatusp = status_page; 234 *status_page++ = 0; 235 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 236 dd->pport[pidx].statusp = status_page; 237 *status_page++ = 0; 238 } 239 240 /* 241 * Setup buffer to hold freeze and other messages, accessible to 242 * apps, following statusp. This is per-unit, not per port. 243 */ 244 dd->freezemsg = (char *) status_page; 245 *dd->freezemsg = 0; 246 /* length of msg buffer is "whatever is left" */ 247 ret = (char *) status_page - (char *) dd->pioavailregs_dma; 248 dd->freezelen = PAGE_SIZE - ret; 249 250 ret = 0; 251 252 done: 253 return ret; 254 } 255 256 /** 257 * init_shadow_tids - allocate the shadow TID array 258 * @dd: the qlogic_ib device 259 * 260 * allocate the shadow TID array, so we can qib_munlock previous 261 * entries. It may make more sense to move the pageshadow to the 262 * ctxt data structure, so we only allocate memory for ctxts actually 263 * in use, since we at 8k per ctxt, now. 264 * We don't want failures here to prevent use of the driver/chip, 265 * so no return value. 266 */ 267 static void init_shadow_tids(struct qib_devdata *dd) 268 { 269 struct page **pages; 270 dma_addr_t *addrs; 271 272 pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *)); 273 if (!pages) { 274 qib_dev_err(dd, "failed to allocate shadow page * " 275 "array, no expected sends!\n"); 276 goto bail; 277 } 278 279 addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t)); 280 if (!addrs) { 281 qib_dev_err(dd, "failed to allocate shadow dma handle " 282 "array, no expected sends!\n"); 283 goto bail_free; 284 } 285 286 dd->pageshadow = pages; 287 dd->physshadow = addrs; 288 return; 289 290 bail_free: 291 vfree(pages); 292 bail: 293 dd->pageshadow = NULL; 294 } 295 296 /* 297 * Do initialization for device that is only needed on 298 * first detect, not on resets. 299 */ 300 static int loadtime_init(struct qib_devdata *dd) 301 { 302 int ret = 0; 303 304 if (((dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) & 305 QLOGIC_IB_R_SOFTWARE_MASK) != QIB_CHIP_SWVERSION) { 306 qib_dev_err(dd, "Driver only handles version %d, " 307 "chip swversion is %d (%llx), failng\n", 308 QIB_CHIP_SWVERSION, 309 (int)(dd->revision >> 310 QLOGIC_IB_R_SOFTWARE_SHIFT) & 311 QLOGIC_IB_R_SOFTWARE_MASK, 312 (unsigned long long) dd->revision); 313 ret = -ENOSYS; 314 goto done; 315 } 316 317 if (dd->revision & QLOGIC_IB_R_EMULATOR_MASK) 318 qib_devinfo(dd->pcidev, "%s", dd->boardversion); 319 320 spin_lock_init(&dd->pioavail_lock); 321 spin_lock_init(&dd->sendctrl_lock); 322 spin_lock_init(&dd->uctxt_lock); 323 spin_lock_init(&dd->qib_diag_trans_lock); 324 spin_lock_init(&dd->eep_st_lock); 325 mutex_init(&dd->eep_lock); 326 327 if (qib_mini_init) 328 goto done; 329 330 ret = init_pioavailregs(dd); 331 init_shadow_tids(dd); 332 333 qib_get_eeprom_info(dd); 334 335 /* setup time (don't start yet) to verify we got interrupt */ 336 init_timer(&dd->intrchk_timer); 337 dd->intrchk_timer.function = verify_interrupt; 338 dd->intrchk_timer.data = (unsigned long) dd; 339 340 done: 341 return ret; 342 } 343 344 /** 345 * init_after_reset - re-initialize after a reset 346 * @dd: the qlogic_ib device 347 * 348 * sanity check at least some of the values after reset, and 349 * ensure no receive or transmit (explictly, in case reset 350 * failed 351 */ 352 static int init_after_reset(struct qib_devdata *dd) 353 { 354 int i; 355 356 /* 357 * Ensure chip does no sends or receives, tail updates, or 358 * pioavail updates while we re-initialize. This is mostly 359 * for the driver data structures, not chip registers. 360 */ 361 for (i = 0; i < dd->num_pports; ++i) { 362 /* 363 * ctxt == -1 means "all contexts". Only really safe for 364 * _dis_abling things, as here. 365 */ 366 dd->f_rcvctrl(dd->pport + i, QIB_RCVCTRL_CTXT_DIS | 367 QIB_RCVCTRL_INTRAVAIL_DIS | 368 QIB_RCVCTRL_TAILUPD_DIS, -1); 369 /* Redundant across ports for some, but no big deal. */ 370 dd->f_sendctrl(dd->pport + i, QIB_SENDCTRL_SEND_DIS | 371 QIB_SENDCTRL_AVAIL_DIS); 372 } 373 374 return 0; 375 } 376 377 static void enable_chip(struct qib_devdata *dd) 378 { 379 u64 rcvmask; 380 int i; 381 382 /* 383 * Enable PIO send, and update of PIOavail regs to memory. 384 */ 385 for (i = 0; i < dd->num_pports; ++i) 386 dd->f_sendctrl(dd->pport + i, QIB_SENDCTRL_SEND_ENB | 387 QIB_SENDCTRL_AVAIL_ENB); 388 /* 389 * Enable kernel ctxts' receive and receive interrupt. 390 * Other ctxts done as user opens and inits them. 391 */ 392 rcvmask = QIB_RCVCTRL_CTXT_ENB | QIB_RCVCTRL_INTRAVAIL_ENB; 393 rcvmask |= (dd->flags & QIB_NODMA_RTAIL) ? 394 QIB_RCVCTRL_TAILUPD_DIS : QIB_RCVCTRL_TAILUPD_ENB; 395 for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) { 396 struct qib_ctxtdata *rcd = dd->rcd[i]; 397 398 if (rcd) 399 dd->f_rcvctrl(rcd->ppd, rcvmask, i); 400 } 401 } 402 403 static void verify_interrupt(unsigned long opaque) 404 { 405 struct qib_devdata *dd = (struct qib_devdata *) opaque; 406 407 if (!dd) 408 return; /* being torn down */ 409 410 /* 411 * If we don't have a lid or any interrupts, let the user know and 412 * don't bother checking again. 413 */ 414 if (dd->int_counter == 0) { 415 if (!dd->f_intr_fallback(dd)) 416 dev_err(&dd->pcidev->dev, "No interrupts detected, " 417 "not usable.\n"); 418 else /* re-arm the timer to see if fallback works */ 419 mod_timer(&dd->intrchk_timer, jiffies + HZ/2); 420 } 421 } 422 423 static void init_piobuf_state(struct qib_devdata *dd) 424 { 425 int i, pidx; 426 u32 uctxts; 427 428 /* 429 * Ensure all buffers are free, and fifos empty. Buffers 430 * are common, so only do once for port 0. 431 * 432 * After enable and qib_chg_pioavailkernel so we can safely 433 * enable pioavail updates and PIOENABLE. After this, packets 434 * are ready and able to go out. 435 */ 436 dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_ALL); 437 for (pidx = 0; pidx < dd->num_pports; ++pidx) 438 dd->f_sendctrl(dd->pport + pidx, QIB_SENDCTRL_FLUSH); 439 440 /* 441 * If not all sendbufs are used, add the one to each of the lower 442 * numbered contexts. pbufsctxt and lastctxt_piobuf are 443 * calculated in chip-specific code because it may cause some 444 * chip-specific adjustments to be made. 445 */ 446 uctxts = dd->cfgctxts - dd->first_user_ctxt; 447 dd->ctxts_extrabuf = dd->pbufsctxt ? 448 dd->lastctxt_piobuf - (dd->pbufsctxt * uctxts) : 0; 449 450 /* 451 * Set up the shadow copies of the piobufavail registers, 452 * which we compare against the chip registers for now, and 453 * the in memory DMA'ed copies of the registers. 454 * By now pioavail updates to memory should have occurred, so 455 * copy them into our working/shadow registers; this is in 456 * case something went wrong with abort, but mostly to get the 457 * initial values of the generation bit correct. 458 */ 459 for (i = 0; i < dd->pioavregs; i++) { 460 __le64 tmp; 461 462 tmp = dd->pioavailregs_dma[i]; 463 /* 464 * Don't need to worry about pioavailkernel here 465 * because we will call qib_chg_pioavailkernel() later 466 * in initialization, to busy out buffers as needed. 467 */ 468 dd->pioavailshadow[i] = le64_to_cpu(tmp); 469 } 470 while (i < ARRAY_SIZE(dd->pioavailshadow)) 471 dd->pioavailshadow[i++] = 0; /* for debugging sanity */ 472 473 /* after pioavailshadow is setup */ 474 qib_chg_pioavailkernel(dd, 0, dd->piobcnt2k + dd->piobcnt4k, 475 TXCHK_CHG_TYPE_KERN, NULL); 476 dd->f_initvl15_bufs(dd); 477 } 478 479 /** 480 * qib_init - do the actual initialization sequence on the chip 481 * @dd: the qlogic_ib device 482 * @reinit: reinitializing, so don't allocate new memory 483 * 484 * Do the actual initialization sequence on the chip. This is done 485 * both from the init routine called from the PCI infrastructure, and 486 * when we reset the chip, or detect that it was reset internally, 487 * or it's administratively re-enabled. 488 * 489 * Memory allocation here and in called routines is only done in 490 * the first case (reinit == 0). We have to be careful, because even 491 * without memory allocation, we need to re-write all the chip registers 492 * TIDs, etc. after the reset or enable has completed. 493 */ 494 int qib_init(struct qib_devdata *dd, int reinit) 495 { 496 int ret = 0, pidx, lastfail = 0; 497 u32 portok = 0; 498 unsigned i; 499 struct qib_ctxtdata *rcd; 500 struct qib_pportdata *ppd; 501 unsigned long flags; 502 503 /* Set linkstate to unknown, so we can watch for a transition. */ 504 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 505 ppd = dd->pport + pidx; 506 spin_lock_irqsave(&ppd->lflags_lock, flags); 507 ppd->lflags &= ~(QIBL_LINKACTIVE | QIBL_LINKARMED | 508 QIBL_LINKDOWN | QIBL_LINKINIT | 509 QIBL_LINKV); 510 spin_unlock_irqrestore(&ppd->lflags_lock, flags); 511 } 512 513 if (reinit) 514 ret = init_after_reset(dd); 515 else 516 ret = loadtime_init(dd); 517 if (ret) 518 goto done; 519 520 /* Bypass most chip-init, to get to device creation */ 521 if (qib_mini_init) 522 return 0; 523 524 ret = dd->f_late_initreg(dd); 525 if (ret) 526 goto done; 527 528 /* dd->rcd can be NULL if early init failed */ 529 for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) { 530 /* 531 * Set up the (kernel) rcvhdr queue and egr TIDs. If doing 532 * re-init, the simplest way to handle this is to free 533 * existing, and re-allocate. 534 * Need to re-create rest of ctxt 0 ctxtdata as well. 535 */ 536 rcd = dd->rcd[i]; 537 if (!rcd) 538 continue; 539 540 lastfail = qib_create_rcvhdrq(dd, rcd); 541 if (!lastfail) 542 lastfail = qib_setup_eagerbufs(rcd); 543 if (lastfail) { 544 qib_dev_err(dd, "failed to allocate kernel ctxt's " 545 "rcvhdrq and/or egr bufs\n"); 546 continue; 547 } 548 } 549 550 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 551 int mtu; 552 if (lastfail) 553 ret = lastfail; 554 ppd = dd->pport + pidx; 555 mtu = ib_mtu_enum_to_int(qib_ibmtu); 556 if (mtu == -1) { 557 mtu = QIB_DEFAULT_MTU; 558 qib_ibmtu = 0; /* don't leave invalid value */ 559 } 560 /* set max we can ever have for this driver load */ 561 ppd->init_ibmaxlen = min(mtu > 2048 ? 562 dd->piosize4k : dd->piosize2k, 563 dd->rcvegrbufsize + 564 (dd->rcvhdrentsize << 2)); 565 /* 566 * Have to initialize ibmaxlen, but this will normally 567 * change immediately in qib_set_mtu(). 568 */ 569 ppd->ibmaxlen = ppd->init_ibmaxlen; 570 qib_set_mtu(ppd, mtu); 571 572 spin_lock_irqsave(&ppd->lflags_lock, flags); 573 ppd->lflags |= QIBL_IB_LINK_DISABLED; 574 spin_unlock_irqrestore(&ppd->lflags_lock, flags); 575 576 lastfail = dd->f_bringup_serdes(ppd); 577 if (lastfail) { 578 qib_devinfo(dd->pcidev, 579 "Failed to bringup IB port %u\n", ppd->port); 580 lastfail = -ENETDOWN; 581 continue; 582 } 583 584 /* let link come up, and enable IBC */ 585 spin_lock_irqsave(&ppd->lflags_lock, flags); 586 ppd->lflags &= ~QIBL_IB_LINK_DISABLED; 587 spin_unlock_irqrestore(&ppd->lflags_lock, flags); 588 portok++; 589 } 590 591 if (!portok) { 592 /* none of the ports initialized */ 593 if (!ret && lastfail) 594 ret = lastfail; 595 else if (!ret) 596 ret = -ENETDOWN; 597 /* but continue on, so we can debug cause */ 598 } 599 600 enable_chip(dd); 601 602 init_piobuf_state(dd); 603 604 done: 605 if (!ret) { 606 /* chip is OK for user apps; mark it as initialized */ 607 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 608 ppd = dd->pport + pidx; 609 /* 610 * Set status even if port serdes is not initialized 611 * so that diags will work. 612 */ 613 *ppd->statusp |= QIB_STATUS_CHIP_PRESENT | 614 QIB_STATUS_INITTED; 615 if (!ppd->link_speed_enabled) 616 continue; 617 if (dd->flags & QIB_HAS_SEND_DMA) 618 ret = qib_setup_sdma(ppd); 619 init_timer(&ppd->hol_timer); 620 ppd->hol_timer.function = qib_hol_event; 621 ppd->hol_timer.data = (unsigned long)ppd; 622 ppd->hol_state = QIB_HOL_UP; 623 } 624 625 /* now we can enable all interrupts from the chip */ 626 dd->f_set_intr_state(dd, 1); 627 628 /* 629 * Setup to verify we get an interrupt, and fallback 630 * to an alternate if necessary and possible. 631 */ 632 mod_timer(&dd->intrchk_timer, jiffies + HZ/2); 633 /* start stats retrieval timer */ 634 mod_timer(&dd->stats_timer, jiffies + HZ * ACTIVITY_TIMER); 635 } 636 637 /* if ret is non-zero, we probably should do some cleanup here... */ 638 return ret; 639 } 640 641 /* 642 * These next two routines are placeholders in case we don't have per-arch 643 * code for controlling write combining. If explicit control of write 644 * combining is not available, performance will probably be awful. 645 */ 646 647 int __attribute__((weak)) qib_enable_wc(struct qib_devdata *dd) 648 { 649 return -EOPNOTSUPP; 650 } 651 652 void __attribute__((weak)) qib_disable_wc(struct qib_devdata *dd) 653 { 654 } 655 656 static inline struct qib_devdata *__qib_lookup(int unit) 657 { 658 return idr_find(&qib_unit_table, unit); 659 } 660 661 struct qib_devdata *qib_lookup(int unit) 662 { 663 struct qib_devdata *dd; 664 unsigned long flags; 665 666 spin_lock_irqsave(&qib_devs_lock, flags); 667 dd = __qib_lookup(unit); 668 spin_unlock_irqrestore(&qib_devs_lock, flags); 669 670 return dd; 671 } 672 673 /* 674 * Stop the timers during unit shutdown, or after an error late 675 * in initialization. 676 */ 677 static void qib_stop_timers(struct qib_devdata *dd) 678 { 679 struct qib_pportdata *ppd; 680 int pidx; 681 682 if (dd->stats_timer.data) { 683 del_timer_sync(&dd->stats_timer); 684 dd->stats_timer.data = 0; 685 } 686 if (dd->intrchk_timer.data) { 687 del_timer_sync(&dd->intrchk_timer); 688 dd->intrchk_timer.data = 0; 689 } 690 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 691 ppd = dd->pport + pidx; 692 if (ppd->hol_timer.data) 693 del_timer_sync(&ppd->hol_timer); 694 if (ppd->led_override_timer.data) { 695 del_timer_sync(&ppd->led_override_timer); 696 atomic_set(&ppd->led_override_timer_active, 0); 697 } 698 if (ppd->symerr_clear_timer.data) 699 del_timer_sync(&ppd->symerr_clear_timer); 700 } 701 } 702 703 /** 704 * qib_shutdown_device - shut down a device 705 * @dd: the qlogic_ib device 706 * 707 * This is called to make the device quiet when we are about to 708 * unload the driver, and also when the device is administratively 709 * disabled. It does not free any data structures. 710 * Everything it does has to be setup again by qib_init(dd, 1) 711 */ 712 static void qib_shutdown_device(struct qib_devdata *dd) 713 { 714 struct qib_pportdata *ppd; 715 unsigned pidx; 716 717 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 718 ppd = dd->pport + pidx; 719 720 spin_lock_irq(&ppd->lflags_lock); 721 ppd->lflags &= ~(QIBL_LINKDOWN | QIBL_LINKINIT | 722 QIBL_LINKARMED | QIBL_LINKACTIVE | 723 QIBL_LINKV); 724 spin_unlock_irq(&ppd->lflags_lock); 725 *ppd->statusp &= ~(QIB_STATUS_IB_CONF | QIB_STATUS_IB_READY); 726 } 727 dd->flags &= ~QIB_INITTED; 728 729 /* mask interrupts, but not errors */ 730 dd->f_set_intr_state(dd, 0); 731 732 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 733 ppd = dd->pport + pidx; 734 dd->f_rcvctrl(ppd, QIB_RCVCTRL_TAILUPD_DIS | 735 QIB_RCVCTRL_CTXT_DIS | 736 QIB_RCVCTRL_INTRAVAIL_DIS | 737 QIB_RCVCTRL_PKEY_ENB, -1); 738 /* 739 * Gracefully stop all sends allowing any in progress to 740 * trickle out first. 741 */ 742 dd->f_sendctrl(ppd, QIB_SENDCTRL_CLEAR); 743 } 744 745 /* 746 * Enough for anything that's going to trickle out to have actually 747 * done so. 748 */ 749 udelay(20); 750 751 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 752 ppd = dd->pport + pidx; 753 dd->f_setextled(ppd, 0); /* make sure LEDs are off */ 754 755 if (dd->flags & QIB_HAS_SEND_DMA) 756 qib_teardown_sdma(ppd); 757 758 dd->f_sendctrl(ppd, QIB_SENDCTRL_AVAIL_DIS | 759 QIB_SENDCTRL_SEND_DIS); 760 /* 761 * Clear SerdesEnable. 762 * We can't count on interrupts since we are stopping. 763 */ 764 dd->f_quiet_serdes(ppd); 765 } 766 767 qib_update_eeprom_log(dd); 768 } 769 770 /** 771 * qib_free_ctxtdata - free a context's allocated data 772 * @dd: the qlogic_ib device 773 * @rcd: the ctxtdata structure 774 * 775 * free up any allocated data for a context 776 * This should not touch anything that would affect a simultaneous 777 * re-allocation of context data, because it is called after qib_mutex 778 * is released (and can be called from reinit as well). 779 * It should never change any chip state, or global driver state. 780 */ 781 void qib_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd) 782 { 783 if (!rcd) 784 return; 785 786 if (rcd->rcvhdrq) { 787 dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size, 788 rcd->rcvhdrq, rcd->rcvhdrq_phys); 789 rcd->rcvhdrq = NULL; 790 if (rcd->rcvhdrtail_kvaddr) { 791 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, 792 rcd->rcvhdrtail_kvaddr, 793 rcd->rcvhdrqtailaddr_phys); 794 rcd->rcvhdrtail_kvaddr = NULL; 795 } 796 } 797 if (rcd->rcvegrbuf) { 798 unsigned e; 799 800 for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { 801 void *base = rcd->rcvegrbuf[e]; 802 size_t size = rcd->rcvegrbuf_size; 803 804 dma_free_coherent(&dd->pcidev->dev, size, 805 base, rcd->rcvegrbuf_phys[e]); 806 } 807 kfree(rcd->rcvegrbuf); 808 rcd->rcvegrbuf = NULL; 809 kfree(rcd->rcvegrbuf_phys); 810 rcd->rcvegrbuf_phys = NULL; 811 rcd->rcvegrbuf_chunks = 0; 812 } 813 814 kfree(rcd->tid_pg_list); 815 vfree(rcd->user_event_mask); 816 vfree(rcd->subctxt_uregbase); 817 vfree(rcd->subctxt_rcvegrbuf); 818 vfree(rcd->subctxt_rcvhdr_base); 819 kfree(rcd); 820 } 821 822 /* 823 * Perform a PIO buffer bandwidth write test, to verify proper system 824 * configuration. Even when all the setup calls work, occasionally 825 * BIOS or other issues can prevent write combining from working, or 826 * can cause other bandwidth problems to the chip. 827 * 828 * This test simply writes the same buffer over and over again, and 829 * measures close to the peak bandwidth to the chip (not testing 830 * data bandwidth to the wire). On chips that use an address-based 831 * trigger to send packets to the wire, this is easy. On chips that 832 * use a count to trigger, we want to make sure that the packet doesn't 833 * go out on the wire, or trigger flow control checks. 834 */ 835 static void qib_verify_pioperf(struct qib_devdata *dd) 836 { 837 u32 pbnum, cnt, lcnt; 838 u32 __iomem *piobuf; 839 u32 *addr; 840 u64 msecs, emsecs; 841 842 piobuf = dd->f_getsendbuf(dd->pport, 0ULL, &pbnum); 843 if (!piobuf) { 844 qib_devinfo(dd->pcidev, 845 "No PIObufs for checking perf, skipping\n"); 846 return; 847 } 848 849 /* 850 * Enough to give us a reasonable test, less than piobuf size, and 851 * likely multiple of store buffer length. 852 */ 853 cnt = 1024; 854 855 addr = vmalloc(cnt); 856 if (!addr) { 857 qib_devinfo(dd->pcidev, 858 "Couldn't get memory for checking PIO perf," 859 " skipping\n"); 860 goto done; 861 } 862 863 preempt_disable(); /* we want reasonably accurate elapsed time */ 864 msecs = 1 + jiffies_to_msecs(jiffies); 865 for (lcnt = 0; lcnt < 10000U; lcnt++) { 866 /* wait until we cross msec boundary */ 867 if (jiffies_to_msecs(jiffies) >= msecs) 868 break; 869 udelay(1); 870 } 871 872 dd->f_set_armlaunch(dd, 0); 873 874 /* 875 * length 0, no dwords actually sent 876 */ 877 writeq(0, piobuf); 878 qib_flush_wc(); 879 880 /* 881 * This is only roughly accurate, since even with preempt we 882 * still take interrupts that could take a while. Running for 883 * >= 5 msec seems to get us "close enough" to accurate values. 884 */ 885 msecs = jiffies_to_msecs(jiffies); 886 for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) { 887 qib_pio_copy(piobuf + 64, addr, cnt >> 2); 888 emsecs = jiffies_to_msecs(jiffies) - msecs; 889 } 890 891 /* 1 GiB/sec, slightly over IB SDR line rate */ 892 if (lcnt < (emsecs * 1024U)) 893 qib_dev_err(dd, 894 "Performance problem: bandwidth to PIO buffers is " 895 "only %u MiB/sec\n", 896 lcnt / (u32) emsecs); 897 898 preempt_enable(); 899 900 vfree(addr); 901 902 done: 903 /* disarm piobuf, so it's available again */ 904 dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(pbnum)); 905 qib_sendbuf_done(dd, pbnum); 906 dd->f_set_armlaunch(dd, 1); 907 } 908 909 910 void qib_free_devdata(struct qib_devdata *dd) 911 { 912 unsigned long flags; 913 914 spin_lock_irqsave(&qib_devs_lock, flags); 915 idr_remove(&qib_unit_table, dd->unit); 916 list_del(&dd->list); 917 spin_unlock_irqrestore(&qib_devs_lock, flags); 918 919 ib_dealloc_device(&dd->verbs_dev.ibdev); 920 } 921 922 /* 923 * Allocate our primary per-unit data structure. Must be done via verbs 924 * allocator, because the verbs cleanup process both does cleanup and 925 * free of the data structure. 926 * "extra" is for chip-specific data. 927 * 928 * Use the idr mechanism to get a unit number for this unit. 929 */ 930 struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) 931 { 932 unsigned long flags; 933 struct qib_devdata *dd; 934 int ret; 935 936 if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { 937 dd = ERR_PTR(-ENOMEM); 938 goto bail; 939 } 940 941 dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra); 942 if (!dd) { 943 dd = ERR_PTR(-ENOMEM); 944 goto bail; 945 } 946 947 spin_lock_irqsave(&qib_devs_lock, flags); 948 ret = idr_get_new(&qib_unit_table, dd, &dd->unit); 949 if (ret >= 0) 950 list_add(&dd->list, &qib_dev_list); 951 spin_unlock_irqrestore(&qib_devs_lock, flags); 952 953 if (ret < 0) { 954 qib_early_err(&pdev->dev, 955 "Could not allocate unit ID: error %d\n", -ret); 956 ib_dealloc_device(&dd->verbs_dev.ibdev); 957 dd = ERR_PTR(ret); 958 goto bail; 959 } 960 961 if (!qib_cpulist_count) { 962 u32 count = num_online_cpus(); 963 qib_cpulist = kzalloc(BITS_TO_LONGS(count) * 964 sizeof(long), GFP_KERNEL); 965 if (qib_cpulist) 966 qib_cpulist_count = count; 967 else 968 qib_early_err(&pdev->dev, "Could not alloc cpulist " 969 "info, cpu affinity might be wrong\n"); 970 } 971 972 bail: 973 return dd; 974 } 975 976 /* 977 * Called from freeze mode handlers, and from PCI error 978 * reporting code. Should be paranoid about state of 979 * system and data structures. 980 */ 981 void qib_disable_after_error(struct qib_devdata *dd) 982 { 983 if (dd->flags & QIB_INITTED) { 984 u32 pidx; 985 986 dd->flags &= ~QIB_INITTED; 987 if (dd->pport) 988 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 989 struct qib_pportdata *ppd; 990 991 ppd = dd->pport + pidx; 992 if (dd->flags & QIB_PRESENT) { 993 qib_set_linkstate(ppd, 994 QIB_IB_LINKDOWN_DISABLE); 995 dd->f_setextled(ppd, 0); 996 } 997 *ppd->statusp &= ~QIB_STATUS_IB_READY; 998 } 999 } 1000 1001 /* 1002 * Mark as having had an error for driver, and also 1003 * for /sys and status word mapped to user programs. 1004 * This marks unit as not usable, until reset. 1005 */ 1006 if (dd->devstatusp) 1007 *dd->devstatusp |= QIB_STATUS_HWERROR; 1008 } 1009 1010 static void __devexit qib_remove_one(struct pci_dev *); 1011 static int __devinit qib_init_one(struct pci_dev *, 1012 const struct pci_device_id *); 1013 1014 #define DRIVER_LOAD_MSG "QLogic " QIB_DRV_NAME " loaded: " 1015 #define PFX QIB_DRV_NAME ": " 1016 1017 static const struct pci_device_id qib_pci_tbl[] = { 1018 { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_QLOGIC_IB_6120) }, 1019 { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7220) }, 1020 { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7322) }, 1021 { 0, } 1022 }; 1023 1024 MODULE_DEVICE_TABLE(pci, qib_pci_tbl); 1025 1026 struct pci_driver qib_driver = { 1027 .name = QIB_DRV_NAME, 1028 .probe = qib_init_one, 1029 .remove = __devexit_p(qib_remove_one), 1030 .id_table = qib_pci_tbl, 1031 .err_handler = &qib_pci_err_handler, 1032 }; 1033 1034 /* 1035 * Do all the generic driver unit- and chip-independent memory 1036 * allocation and initialization. 1037 */ 1038 static int __init qlogic_ib_init(void) 1039 { 1040 int ret; 1041 1042 ret = qib_dev_init(); 1043 if (ret) 1044 goto bail; 1045 1046 qib_cq_wq = create_singlethread_workqueue("qib_cq"); 1047 if (!qib_cq_wq) { 1048 ret = -ENOMEM; 1049 goto bail_dev; 1050 } 1051 1052 /* 1053 * These must be called before the driver is registered with 1054 * the PCI subsystem. 1055 */ 1056 idr_init(&qib_unit_table); 1057 if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { 1058 printk(KERN_ERR QIB_DRV_NAME ": idr_pre_get() failed\n"); 1059 ret = -ENOMEM; 1060 goto bail_cq_wq; 1061 } 1062 1063 ret = pci_register_driver(&qib_driver); 1064 if (ret < 0) { 1065 printk(KERN_ERR QIB_DRV_NAME 1066 ": Unable to register driver: error %d\n", -ret); 1067 goto bail_unit; 1068 } 1069 1070 /* not fatal if it doesn't work */ 1071 if (qib_init_qibfs()) 1072 printk(KERN_ERR QIB_DRV_NAME ": Unable to register ipathfs\n"); 1073 goto bail; /* all OK */ 1074 1075 bail_unit: 1076 idr_destroy(&qib_unit_table); 1077 bail_cq_wq: 1078 destroy_workqueue(qib_cq_wq); 1079 bail_dev: 1080 qib_dev_cleanup(); 1081 bail: 1082 return ret; 1083 } 1084 1085 module_init(qlogic_ib_init); 1086 1087 /* 1088 * Do the non-unit driver cleanup, memory free, etc. at unload. 1089 */ 1090 static void __exit qlogic_ib_cleanup(void) 1091 { 1092 int ret; 1093 1094 ret = qib_exit_qibfs(); 1095 if (ret) 1096 printk(KERN_ERR QIB_DRV_NAME ": " 1097 "Unable to cleanup counter filesystem: " 1098 "error %d\n", -ret); 1099 1100 pci_unregister_driver(&qib_driver); 1101 1102 destroy_workqueue(qib_cq_wq); 1103 1104 qib_cpulist_count = 0; 1105 kfree(qib_cpulist); 1106 1107 idr_destroy(&qib_unit_table); 1108 qib_dev_cleanup(); 1109 } 1110 1111 module_exit(qlogic_ib_cleanup); 1112 1113 /* this can only be called after a successful initialization */ 1114 static void cleanup_device_data(struct qib_devdata *dd) 1115 { 1116 int ctxt; 1117 int pidx; 1118 struct qib_ctxtdata **tmp; 1119 unsigned long flags; 1120 1121 /* users can't do anything more with chip */ 1122 for (pidx = 0; pidx < dd->num_pports; ++pidx) 1123 if (dd->pport[pidx].statusp) 1124 *dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT; 1125 1126 if (!qib_wc_pat) 1127 qib_disable_wc(dd); 1128 1129 if (dd->pioavailregs_dma) { 1130 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, 1131 (void *) dd->pioavailregs_dma, 1132 dd->pioavailregs_phys); 1133 dd->pioavailregs_dma = NULL; 1134 } 1135 1136 if (dd->pageshadow) { 1137 struct page **tmpp = dd->pageshadow; 1138 dma_addr_t *tmpd = dd->physshadow; 1139 int i, cnt = 0; 1140 1141 for (ctxt = 0; ctxt < dd->cfgctxts; ctxt++) { 1142 int ctxt_tidbase = ctxt * dd->rcvtidcnt; 1143 int maxtid = ctxt_tidbase + dd->rcvtidcnt; 1144 1145 for (i = ctxt_tidbase; i < maxtid; i++) { 1146 if (!tmpp[i]) 1147 continue; 1148 pci_unmap_page(dd->pcidev, tmpd[i], 1149 PAGE_SIZE, PCI_DMA_FROMDEVICE); 1150 qib_release_user_pages(&tmpp[i], 1); 1151 tmpp[i] = NULL; 1152 cnt++; 1153 } 1154 } 1155 1156 tmpp = dd->pageshadow; 1157 dd->pageshadow = NULL; 1158 vfree(tmpp); 1159 } 1160 1161 /* 1162 * Free any resources still in use (usually just kernel contexts) 1163 * at unload; we do for ctxtcnt, because that's what we allocate. 1164 * We acquire lock to be really paranoid that rcd isn't being 1165 * accessed from some interrupt-related code (that should not happen, 1166 * but best to be sure). 1167 */ 1168 spin_lock_irqsave(&dd->uctxt_lock, flags); 1169 tmp = dd->rcd; 1170 dd->rcd = NULL; 1171 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 1172 for (ctxt = 0; tmp && ctxt < dd->ctxtcnt; ctxt++) { 1173 struct qib_ctxtdata *rcd = tmp[ctxt]; 1174 1175 tmp[ctxt] = NULL; /* debugging paranoia */ 1176 qib_free_ctxtdata(dd, rcd); 1177 } 1178 kfree(tmp); 1179 kfree(dd->boardname); 1180 } 1181 1182 /* 1183 * Clean up on unit shutdown, or error during unit load after 1184 * successful initialization. 1185 */ 1186 static void qib_postinit_cleanup(struct qib_devdata *dd) 1187 { 1188 /* 1189 * Clean up chip-specific stuff. 1190 * We check for NULL here, because it's outside 1191 * the kregbase check, and we need to call it 1192 * after the free_irq. Thus it's possible that 1193 * the function pointers were never initialized. 1194 */ 1195 if (dd->f_cleanup) 1196 dd->f_cleanup(dd); 1197 1198 qib_pcie_ddcleanup(dd); 1199 1200 cleanup_device_data(dd); 1201 1202 qib_free_devdata(dd); 1203 } 1204 1205 static int __devinit qib_init_one(struct pci_dev *pdev, 1206 const struct pci_device_id *ent) 1207 { 1208 int ret, j, pidx, initfail; 1209 struct qib_devdata *dd = NULL; 1210 1211 ret = qib_pcie_init(pdev, ent); 1212 if (ret) 1213 goto bail; 1214 1215 /* 1216 * Do device-specific initialiation, function table setup, dd 1217 * allocation, etc. 1218 */ 1219 switch (ent->device) { 1220 case PCI_DEVICE_ID_QLOGIC_IB_6120: 1221 #ifdef CONFIG_PCI_MSI 1222 dd = qib_init_iba6120_funcs(pdev, ent); 1223 #else 1224 qib_early_err(&pdev->dev, "QLogic PCIE device 0x%x cannot " 1225 "work if CONFIG_PCI_MSI is not enabled\n", 1226 ent->device); 1227 dd = ERR_PTR(-ENODEV); 1228 #endif 1229 break; 1230 1231 case PCI_DEVICE_ID_QLOGIC_IB_7220: 1232 dd = qib_init_iba7220_funcs(pdev, ent); 1233 break; 1234 1235 case PCI_DEVICE_ID_QLOGIC_IB_7322: 1236 dd = qib_init_iba7322_funcs(pdev, ent); 1237 break; 1238 1239 default: 1240 qib_early_err(&pdev->dev, "Failing on unknown QLogic " 1241 "deviceid 0x%x\n", ent->device); 1242 ret = -ENODEV; 1243 } 1244 1245 if (IS_ERR(dd)) 1246 ret = PTR_ERR(dd); 1247 if (ret) 1248 goto bail; /* error already printed */ 1249 1250 /* do the generic initialization */ 1251 initfail = qib_init(dd, 0); 1252 1253 ret = qib_register_ib_device(dd); 1254 1255 /* 1256 * Now ready for use. this should be cleared whenever we 1257 * detect a reset, or initiate one. If earlier failure, 1258 * we still create devices, so diags, etc. can be used 1259 * to determine cause of problem. 1260 */ 1261 if (!qib_mini_init && !initfail && !ret) 1262 dd->flags |= QIB_INITTED; 1263 1264 j = qib_device_create(dd); 1265 if (j) 1266 qib_dev_err(dd, "Failed to create /dev devices: %d\n", -j); 1267 j = qibfs_add(dd); 1268 if (j) 1269 qib_dev_err(dd, "Failed filesystem setup for counters: %d\n", 1270 -j); 1271 1272 if (qib_mini_init || initfail || ret) { 1273 qib_stop_timers(dd); 1274 flush_workqueue(ib_wq); 1275 for (pidx = 0; pidx < dd->num_pports; ++pidx) 1276 dd->f_quiet_serdes(dd->pport + pidx); 1277 if (qib_mini_init) 1278 goto bail; 1279 if (!j) { 1280 (void) qibfs_remove(dd); 1281 qib_device_remove(dd); 1282 } 1283 if (!ret) 1284 qib_unregister_ib_device(dd); 1285 qib_postinit_cleanup(dd); 1286 if (initfail) 1287 ret = initfail; 1288 goto bail; 1289 } 1290 1291 if (!qib_wc_pat) { 1292 ret = qib_enable_wc(dd); 1293 if (ret) { 1294 qib_dev_err(dd, "Write combining not enabled " 1295 "(err %d): performance may be poor\n", 1296 -ret); 1297 ret = 0; 1298 } 1299 } 1300 1301 qib_verify_pioperf(dd); 1302 bail: 1303 return ret; 1304 } 1305 1306 static void __devexit qib_remove_one(struct pci_dev *pdev) 1307 { 1308 struct qib_devdata *dd = pci_get_drvdata(pdev); 1309 int ret; 1310 1311 /* unregister from IB core */ 1312 qib_unregister_ib_device(dd); 1313 1314 /* 1315 * Disable the IB link, disable interrupts on the device, 1316 * clear dma engines, etc. 1317 */ 1318 if (!qib_mini_init) 1319 qib_shutdown_device(dd); 1320 1321 qib_stop_timers(dd); 1322 1323 /* wait until all of our (qsfp) queue_work() calls complete */ 1324 flush_workqueue(ib_wq); 1325 1326 ret = qibfs_remove(dd); 1327 if (ret) 1328 qib_dev_err(dd, "Failed counters filesystem cleanup: %d\n", 1329 -ret); 1330 1331 qib_device_remove(dd); 1332 1333 qib_postinit_cleanup(dd); 1334 } 1335 1336 /** 1337 * qib_create_rcvhdrq - create a receive header queue 1338 * @dd: the qlogic_ib device 1339 * @rcd: the context data 1340 * 1341 * This must be contiguous memory (from an i/o perspective), and must be 1342 * DMA'able (which means for some systems, it will go through an IOMMU, 1343 * or be forced into a low address range). 1344 */ 1345 int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) 1346 { 1347 unsigned amt; 1348 1349 if (!rcd->rcvhdrq) { 1350 dma_addr_t phys_hdrqtail; 1351 gfp_t gfp_flags; 1352 1353 amt = ALIGN(dd->rcvhdrcnt * dd->rcvhdrentsize * 1354 sizeof(u32), PAGE_SIZE); 1355 gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? 1356 GFP_USER : GFP_KERNEL; 1357 rcd->rcvhdrq = dma_alloc_coherent( 1358 &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys, 1359 gfp_flags | __GFP_COMP); 1360 1361 if (!rcd->rcvhdrq) { 1362 qib_dev_err(dd, "attempt to allocate %d bytes " 1363 "for ctxt %u rcvhdrq failed\n", 1364 amt, rcd->ctxt); 1365 goto bail; 1366 } 1367 1368 if (rcd->ctxt >= dd->first_user_ctxt) { 1369 rcd->user_event_mask = vmalloc_user(PAGE_SIZE); 1370 if (!rcd->user_event_mask) 1371 goto bail_free_hdrq; 1372 } 1373 1374 if (!(dd->flags & QIB_NODMA_RTAIL)) { 1375 rcd->rcvhdrtail_kvaddr = dma_alloc_coherent( 1376 &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, 1377 gfp_flags); 1378 if (!rcd->rcvhdrtail_kvaddr) 1379 goto bail_free; 1380 rcd->rcvhdrqtailaddr_phys = phys_hdrqtail; 1381 } 1382 1383 rcd->rcvhdrq_size = amt; 1384 } 1385 1386 /* clear for security and sanity on each use */ 1387 memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size); 1388 if (rcd->rcvhdrtail_kvaddr) 1389 memset(rcd->rcvhdrtail_kvaddr, 0, PAGE_SIZE); 1390 return 0; 1391 1392 bail_free: 1393 qib_dev_err(dd, "attempt to allocate 1 page for ctxt %u " 1394 "rcvhdrqtailaddr failed\n", rcd->ctxt); 1395 vfree(rcd->user_event_mask); 1396 rcd->user_event_mask = NULL; 1397 bail_free_hdrq: 1398 dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq, 1399 rcd->rcvhdrq_phys); 1400 rcd->rcvhdrq = NULL; 1401 bail: 1402 return -ENOMEM; 1403 } 1404 1405 /** 1406 * allocate eager buffers, both kernel and user contexts. 1407 * @rcd: the context we are setting up. 1408 * 1409 * Allocate the eager TID buffers and program them into hip. 1410 * They are no longer completely contiguous, we do multiple allocation 1411 * calls. Otherwise we get the OOM code involved, by asking for too 1412 * much per call, with disastrous results on some kernels. 1413 */ 1414 int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) 1415 { 1416 struct qib_devdata *dd = rcd->dd; 1417 unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; 1418 size_t size; 1419 gfp_t gfp_flags; 1420 1421 /* 1422 * GFP_USER, but without GFP_FS, so buffer cache can be 1423 * coalesced (we hope); otherwise, even at order 4, 1424 * heavy filesystem activity makes these fail, and we can 1425 * use compound pages. 1426 */ 1427 gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; 1428 1429 egrcnt = rcd->rcvegrcnt; 1430 egroff = rcd->rcvegr_tid_base; 1431 egrsize = dd->rcvegrbufsize; 1432 1433 chunk = rcd->rcvegrbuf_chunks; 1434 egrperchunk = rcd->rcvegrbufs_perchunk; 1435 size = rcd->rcvegrbuf_size; 1436 if (!rcd->rcvegrbuf) { 1437 rcd->rcvegrbuf = 1438 kzalloc(chunk * sizeof(rcd->rcvegrbuf[0]), 1439 GFP_KERNEL); 1440 if (!rcd->rcvegrbuf) 1441 goto bail; 1442 } 1443 if (!rcd->rcvegrbuf_phys) { 1444 rcd->rcvegrbuf_phys = 1445 kmalloc(chunk * sizeof(rcd->rcvegrbuf_phys[0]), 1446 GFP_KERNEL); 1447 if (!rcd->rcvegrbuf_phys) 1448 goto bail_rcvegrbuf; 1449 } 1450 for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { 1451 if (rcd->rcvegrbuf[e]) 1452 continue; 1453 rcd->rcvegrbuf[e] = 1454 dma_alloc_coherent(&dd->pcidev->dev, size, 1455 &rcd->rcvegrbuf_phys[e], 1456 gfp_flags); 1457 if (!rcd->rcvegrbuf[e]) 1458 goto bail_rcvegrbuf_phys; 1459 } 1460 1461 rcd->rcvegr_phys = rcd->rcvegrbuf_phys[0]; 1462 1463 for (e = chunk = 0; chunk < rcd->rcvegrbuf_chunks; chunk++) { 1464 dma_addr_t pa = rcd->rcvegrbuf_phys[chunk]; 1465 unsigned i; 1466 1467 /* clear for security and sanity on each use */ 1468 memset(rcd->rcvegrbuf[chunk], 0, size); 1469 1470 for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) { 1471 dd->f_put_tid(dd, e + egroff + 1472 (u64 __iomem *) 1473 ((char __iomem *) 1474 dd->kregbase + 1475 dd->rcvegrbase), 1476 RCVHQ_RCV_TYPE_EAGER, pa); 1477 pa += egrsize; 1478 } 1479 cond_resched(); /* don't hog the cpu */ 1480 } 1481 1482 return 0; 1483 1484 bail_rcvegrbuf_phys: 1485 for (e = 0; e < rcd->rcvegrbuf_chunks && rcd->rcvegrbuf[e]; e++) 1486 dma_free_coherent(&dd->pcidev->dev, size, 1487 rcd->rcvegrbuf[e], rcd->rcvegrbuf_phys[e]); 1488 kfree(rcd->rcvegrbuf_phys); 1489 rcd->rcvegrbuf_phys = NULL; 1490 bail_rcvegrbuf: 1491 kfree(rcd->rcvegrbuf); 1492 rcd->rcvegrbuf = NULL; 1493 bail: 1494 return -ENOMEM; 1495 } 1496 1497 /* 1498 * Note: Changes to this routine should be mirrored 1499 * for the diagnostics routine qib_remap_ioaddr32(). 1500 * There is also related code for VL15 buffers in qib_init_7322_variables(). 1501 * The teardown code that unmaps is in qib_pcie_ddcleanup() 1502 */ 1503 int init_chip_wc_pat(struct qib_devdata *dd, u32 vl15buflen) 1504 { 1505 u64 __iomem *qib_kregbase = NULL; 1506 void __iomem *qib_piobase = NULL; 1507 u64 __iomem *qib_userbase = NULL; 1508 u64 qib_kreglen; 1509 u64 qib_pio2koffset = dd->piobufbase & 0xffffffff; 1510 u64 qib_pio4koffset = dd->piobufbase >> 32; 1511 u64 qib_pio2klen = dd->piobcnt2k * dd->palign; 1512 u64 qib_pio4klen = dd->piobcnt4k * dd->align4k; 1513 u64 qib_physaddr = dd->physaddr; 1514 u64 qib_piolen; 1515 u64 qib_userlen = 0; 1516 1517 /* 1518 * Free the old mapping because the kernel will try to reuse the 1519 * old mapping and not create a new mapping with the 1520 * write combining attribute. 1521 */ 1522 iounmap(dd->kregbase); 1523 dd->kregbase = NULL; 1524 1525 /* 1526 * Assumes chip address space looks like: 1527 * - kregs + sregs + cregs + uregs (in any order) 1528 * - piobufs (2K and 4K bufs in either order) 1529 * or: 1530 * - kregs + sregs + cregs (in any order) 1531 * - piobufs (2K and 4K bufs in either order) 1532 * - uregs 1533 */ 1534 if (dd->piobcnt4k == 0) { 1535 qib_kreglen = qib_pio2koffset; 1536 qib_piolen = qib_pio2klen; 1537 } else if (qib_pio2koffset < qib_pio4koffset) { 1538 qib_kreglen = qib_pio2koffset; 1539 qib_piolen = qib_pio4koffset + qib_pio4klen - qib_kreglen; 1540 } else { 1541 qib_kreglen = qib_pio4koffset; 1542 qib_piolen = qib_pio2koffset + qib_pio2klen - qib_kreglen; 1543 } 1544 qib_piolen += vl15buflen; 1545 /* Map just the configured ports (not all hw ports) */ 1546 if (dd->uregbase > qib_kreglen) 1547 qib_userlen = dd->ureg_align * dd->cfgctxts; 1548 1549 /* Sanity checks passed, now create the new mappings */ 1550 qib_kregbase = ioremap_nocache(qib_physaddr, qib_kreglen); 1551 if (!qib_kregbase) 1552 goto bail; 1553 1554 qib_piobase = ioremap_wc(qib_physaddr + qib_kreglen, qib_piolen); 1555 if (!qib_piobase) 1556 goto bail_kregbase; 1557 1558 if (qib_userlen) { 1559 qib_userbase = ioremap_nocache(qib_physaddr + dd->uregbase, 1560 qib_userlen); 1561 if (!qib_userbase) 1562 goto bail_piobase; 1563 } 1564 1565 dd->kregbase = qib_kregbase; 1566 dd->kregend = (u64 __iomem *) 1567 ((char __iomem *) qib_kregbase + qib_kreglen); 1568 dd->piobase = qib_piobase; 1569 dd->pio2kbase = (void __iomem *) 1570 (((char __iomem *) dd->piobase) + 1571 qib_pio2koffset - qib_kreglen); 1572 if (dd->piobcnt4k) 1573 dd->pio4kbase = (void __iomem *) 1574 (((char __iomem *) dd->piobase) + 1575 qib_pio4koffset - qib_kreglen); 1576 if (qib_userlen) 1577 /* ureg will now be accessed relative to dd->userbase */ 1578 dd->userbase = qib_userbase; 1579 return 0; 1580 1581 bail_piobase: 1582 iounmap(qib_piobase); 1583 bail_kregbase: 1584 iounmap(qib_kregbase); 1585 bail: 1586 return -ENOMEM; 1587 } 1588