1 /* 2 * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. 3 * All rights reserved. 4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35 #include <linux/pci.h> 36 #include <linux/netdevice.h> 37 #include <linux/vmalloc.h> 38 #include <linux/delay.h> 39 #include <linux/idr.h> 40 41 #include "qib.h" 42 #include "qib_common.h" 43 44 /* 45 * min buffers we want to have per context, after driver 46 */ 47 #define QIB_MIN_USER_CTXT_BUFCNT 7 48 49 #define QLOGIC_IB_R_SOFTWARE_MASK 0xFF 50 #define QLOGIC_IB_R_SOFTWARE_SHIFT 24 51 #define QLOGIC_IB_R_EMULATOR_MASK (1ULL<<62) 52 53 /* 54 * Number of ctxts we are configured to use (to allow for more pio 55 * buffers per ctxt, etc.) Zero means use chip value. 56 */ 57 ushort qib_cfgctxts; 58 module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO); 59 MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use"); 60 61 /* 62 * If set, do not write to any regs if avoidable, hack to allow 63 * check for deranged default register values. 64 */ 65 ushort qib_mini_init; 66 module_param_named(mini_init, qib_mini_init, ushort, S_IRUGO); 67 MODULE_PARM_DESC(mini_init, "If set, do minimal diag init"); 68 69 unsigned qib_n_krcv_queues; 70 module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO); 71 MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port"); 72 73 /* 74 * qib_wc_pat parameter: 75 * 0 is WC via MTRR 76 * 1 is WC via PAT 77 * If PAT initialization fails, code reverts back to MTRR 78 */ 79 unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */ 80 module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO); 81 MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism"); 82 83 struct workqueue_struct *qib_wq; 84 struct workqueue_struct *qib_cq_wq; 85 86 static void verify_interrupt(unsigned long); 87 88 static struct idr qib_unit_table; 89 u32 qib_cpulist_count; 90 unsigned long *qib_cpulist; 91 92 /* set number of contexts we'll actually use */ 93 void qib_set_ctxtcnt(struct qib_devdata *dd) 94 { 95 if (!qib_cfgctxts) 96 dd->cfgctxts = dd->first_user_ctxt + num_online_cpus(); 97 else if (qib_cfgctxts < dd->num_pports) 98 dd->cfgctxts = dd->ctxtcnt; 99 else if (qib_cfgctxts <= dd->ctxtcnt) 100 dd->cfgctxts = qib_cfgctxts; 101 else 102 dd->cfgctxts = dd->ctxtcnt; 103 } 104 105 /* 106 * Common code for creating the receive context array. 107 */ 108 int qib_create_ctxts(struct qib_devdata *dd) 109 { 110 unsigned i; 111 int ret; 112 113 /* 114 * Allocate full ctxtcnt array, rather than just cfgctxts, because 115 * cleanup iterates across all possible ctxts. 116 */ 117 dd->rcd = kzalloc(sizeof(*dd->rcd) * dd->ctxtcnt, GFP_KERNEL); 118 if (!dd->rcd) { 119 qib_dev_err(dd, "Unable to allocate ctxtdata array, " 120 "failing\n"); 121 ret = -ENOMEM; 122 goto done; 123 } 124 125 /* create (one or more) kctxt */ 126 for (i = 0; i < dd->first_user_ctxt; ++i) { 127 struct qib_pportdata *ppd; 128 struct qib_ctxtdata *rcd; 129 130 if (dd->skip_kctxt_mask & (1 << i)) 131 continue; 132 133 ppd = dd->pport + (i % dd->num_pports); 134 rcd = qib_create_ctxtdata(ppd, i); 135 if (!rcd) { 136 qib_dev_err(dd, "Unable to allocate ctxtdata" 137 " for Kernel ctxt, failing\n"); 138 ret = -ENOMEM; 139 goto done; 140 } 141 rcd->pkeys[0] = QIB_DEFAULT_P_KEY; 142 rcd->seq_cnt = 1; 143 } 144 ret = 0; 145 done: 146 return ret; 147 } 148 149 /* 150 * Common code for user and kernel context setup. 151 */ 152 struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt) 153 { 154 struct qib_devdata *dd = ppd->dd; 155 struct qib_ctxtdata *rcd; 156 157 rcd = kzalloc(sizeof(*rcd), GFP_KERNEL); 158 if (rcd) { 159 INIT_LIST_HEAD(&rcd->qp_wait_list); 160 rcd->ppd = ppd; 161 rcd->dd = dd; 162 rcd->cnt = 1; 163 rcd->ctxt = ctxt; 164 dd->rcd[ctxt] = rcd; 165 166 dd->f_init_ctxt(rcd); 167 168 /* 169 * To avoid wasting a lot of memory, we allocate 32KB chunks 170 * of physically contiguous memory, advance through it until 171 * used up and then allocate more. Of course, we need 172 * memory to store those extra pointers, now. 32KB seems to 173 * be the most that is "safe" under memory pressure 174 * (creating large files and then copying them over 175 * NFS while doing lots of MPI jobs). The OOM killer can 176 * get invoked, even though we say we can sleep and this can 177 * cause significant system problems.... 178 */ 179 rcd->rcvegrbuf_size = 0x8000; 180 rcd->rcvegrbufs_perchunk = 181 rcd->rcvegrbuf_size / dd->rcvegrbufsize; 182 rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt + 183 rcd->rcvegrbufs_perchunk - 1) / 184 rcd->rcvegrbufs_perchunk; 185 } 186 return rcd; 187 } 188 189 /* 190 * Common code for initializing the physical port structure. 191 */ 192 void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, 193 u8 hw_pidx, u8 port) 194 { 195 ppd->dd = dd; 196 ppd->hw_pidx = hw_pidx; 197 ppd->port = port; /* IB port number, not index */ 198 199 spin_lock_init(&ppd->sdma_lock); 200 spin_lock_init(&ppd->lflags_lock); 201 init_waitqueue_head(&ppd->state_wait); 202 203 init_timer(&ppd->symerr_clear_timer); 204 ppd->symerr_clear_timer.function = qib_clear_symerror_on_linkup; 205 ppd->symerr_clear_timer.data = (unsigned long)ppd; 206 } 207 208 static int init_pioavailregs(struct qib_devdata *dd) 209 { 210 int ret, pidx; 211 u64 *status_page; 212 213 dd->pioavailregs_dma = dma_alloc_coherent( 214 &dd->pcidev->dev, PAGE_SIZE, &dd->pioavailregs_phys, 215 GFP_KERNEL); 216 if (!dd->pioavailregs_dma) { 217 qib_dev_err(dd, "failed to allocate PIOavail reg area " 218 "in memory\n"); 219 ret = -ENOMEM; 220 goto done; 221 } 222 223 /* 224 * We really want L2 cache aligned, but for current CPUs of 225 * interest, they are the same. 226 */ 227 status_page = (u64 *) 228 ((char *) dd->pioavailregs_dma + 229 ((2 * L1_CACHE_BYTES + 230 dd->pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES)); 231 /* device status comes first, for backwards compatibility */ 232 dd->devstatusp = status_page; 233 *status_page++ = 0; 234 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 235 dd->pport[pidx].statusp = status_page; 236 *status_page++ = 0; 237 } 238 239 /* 240 * Setup buffer to hold freeze and other messages, accessible to 241 * apps, following statusp. This is per-unit, not per port. 242 */ 243 dd->freezemsg = (char *) status_page; 244 *dd->freezemsg = 0; 245 /* length of msg buffer is "whatever is left" */ 246 ret = (char *) status_page - (char *) dd->pioavailregs_dma; 247 dd->freezelen = PAGE_SIZE - ret; 248 249 ret = 0; 250 251 done: 252 return ret; 253 } 254 255 /** 256 * init_shadow_tids - allocate the shadow TID array 257 * @dd: the qlogic_ib device 258 * 259 * allocate the shadow TID array, so we can qib_munlock previous 260 * entries. It may make more sense to move the pageshadow to the 261 * ctxt data structure, so we only allocate memory for ctxts actually 262 * in use, since we at 8k per ctxt, now. 263 * We don't want failures here to prevent use of the driver/chip, 264 * so no return value. 265 */ 266 static void init_shadow_tids(struct qib_devdata *dd) 267 { 268 struct page **pages; 269 dma_addr_t *addrs; 270 271 pages = vmalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *)); 272 if (!pages) { 273 qib_dev_err(dd, "failed to allocate shadow page * " 274 "array, no expected sends!\n"); 275 goto bail; 276 } 277 278 addrs = vmalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t)); 279 if (!addrs) { 280 qib_dev_err(dd, "failed to allocate shadow dma handle " 281 "array, no expected sends!\n"); 282 goto bail_free; 283 } 284 285 memset(pages, 0, dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *)); 286 memset(addrs, 0, dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t)); 287 288 dd->pageshadow = pages; 289 dd->physshadow = addrs; 290 return; 291 292 bail_free: 293 vfree(pages); 294 bail: 295 dd->pageshadow = NULL; 296 } 297 298 /* 299 * Do initialization for device that is only needed on 300 * first detect, not on resets. 301 */ 302 static int loadtime_init(struct qib_devdata *dd) 303 { 304 int ret = 0; 305 306 if (((dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) & 307 QLOGIC_IB_R_SOFTWARE_MASK) != QIB_CHIP_SWVERSION) { 308 qib_dev_err(dd, "Driver only handles version %d, " 309 "chip swversion is %d (%llx), failng\n", 310 QIB_CHIP_SWVERSION, 311 (int)(dd->revision >> 312 QLOGIC_IB_R_SOFTWARE_SHIFT) & 313 QLOGIC_IB_R_SOFTWARE_MASK, 314 (unsigned long long) dd->revision); 315 ret = -ENOSYS; 316 goto done; 317 } 318 319 if (dd->revision & QLOGIC_IB_R_EMULATOR_MASK) 320 qib_devinfo(dd->pcidev, "%s", dd->boardversion); 321 322 spin_lock_init(&dd->pioavail_lock); 323 spin_lock_init(&dd->sendctrl_lock); 324 spin_lock_init(&dd->uctxt_lock); 325 spin_lock_init(&dd->qib_diag_trans_lock); 326 spin_lock_init(&dd->eep_st_lock); 327 mutex_init(&dd->eep_lock); 328 329 if (qib_mini_init) 330 goto done; 331 332 ret = init_pioavailregs(dd); 333 init_shadow_tids(dd); 334 335 qib_get_eeprom_info(dd); 336 337 /* setup time (don't start yet) to verify we got interrupt */ 338 init_timer(&dd->intrchk_timer); 339 dd->intrchk_timer.function = verify_interrupt; 340 dd->intrchk_timer.data = (unsigned long) dd; 341 342 done: 343 return ret; 344 } 345 346 /** 347 * init_after_reset - re-initialize after a reset 348 * @dd: the qlogic_ib device 349 * 350 * sanity check at least some of the values after reset, and 351 * ensure no receive or transmit (explictly, in case reset 352 * failed 353 */ 354 static int init_after_reset(struct qib_devdata *dd) 355 { 356 int i; 357 358 /* 359 * Ensure chip does no sends or receives, tail updates, or 360 * pioavail updates while we re-initialize. This is mostly 361 * for the driver data structures, not chip registers. 362 */ 363 for (i = 0; i < dd->num_pports; ++i) { 364 /* 365 * ctxt == -1 means "all contexts". Only really safe for 366 * _dis_abling things, as here. 367 */ 368 dd->f_rcvctrl(dd->pport + i, QIB_RCVCTRL_CTXT_DIS | 369 QIB_RCVCTRL_INTRAVAIL_DIS | 370 QIB_RCVCTRL_TAILUPD_DIS, -1); 371 /* Redundant across ports for some, but no big deal. */ 372 dd->f_sendctrl(dd->pport + i, QIB_SENDCTRL_SEND_DIS | 373 QIB_SENDCTRL_AVAIL_DIS); 374 } 375 376 return 0; 377 } 378 379 static void enable_chip(struct qib_devdata *dd) 380 { 381 u64 rcvmask; 382 int i; 383 384 /* 385 * Enable PIO send, and update of PIOavail regs to memory. 386 */ 387 for (i = 0; i < dd->num_pports; ++i) 388 dd->f_sendctrl(dd->pport + i, QIB_SENDCTRL_SEND_ENB | 389 QIB_SENDCTRL_AVAIL_ENB); 390 /* 391 * Enable kernel ctxts' receive and receive interrupt. 392 * Other ctxts done as user opens and inits them. 393 */ 394 rcvmask = QIB_RCVCTRL_CTXT_ENB | QIB_RCVCTRL_INTRAVAIL_ENB; 395 rcvmask |= (dd->flags & QIB_NODMA_RTAIL) ? 396 QIB_RCVCTRL_TAILUPD_DIS : QIB_RCVCTRL_TAILUPD_ENB; 397 for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) { 398 struct qib_ctxtdata *rcd = dd->rcd[i]; 399 400 if (rcd) 401 dd->f_rcvctrl(rcd->ppd, rcvmask, i); 402 } 403 } 404 405 static void verify_interrupt(unsigned long opaque) 406 { 407 struct qib_devdata *dd = (struct qib_devdata *) opaque; 408 409 if (!dd) 410 return; /* being torn down */ 411 412 /* 413 * If we don't have a lid or any interrupts, let the user know and 414 * don't bother checking again. 415 */ 416 if (dd->int_counter == 0) { 417 if (!dd->f_intr_fallback(dd)) 418 dev_err(&dd->pcidev->dev, "No interrupts detected, " 419 "not usable.\n"); 420 else /* re-arm the timer to see if fallback works */ 421 mod_timer(&dd->intrchk_timer, jiffies + HZ/2); 422 } 423 } 424 425 static void init_piobuf_state(struct qib_devdata *dd) 426 { 427 int i, pidx; 428 u32 uctxts; 429 430 /* 431 * Ensure all buffers are free, and fifos empty. Buffers 432 * are common, so only do once for port 0. 433 * 434 * After enable and qib_chg_pioavailkernel so we can safely 435 * enable pioavail updates and PIOENABLE. After this, packets 436 * are ready and able to go out. 437 */ 438 dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_ALL); 439 for (pidx = 0; pidx < dd->num_pports; ++pidx) 440 dd->f_sendctrl(dd->pport + pidx, QIB_SENDCTRL_FLUSH); 441 442 /* 443 * If not all sendbufs are used, add the one to each of the lower 444 * numbered contexts. pbufsctxt and lastctxt_piobuf are 445 * calculated in chip-specific code because it may cause some 446 * chip-specific adjustments to be made. 447 */ 448 uctxts = dd->cfgctxts - dd->first_user_ctxt; 449 dd->ctxts_extrabuf = dd->pbufsctxt ? 450 dd->lastctxt_piobuf - (dd->pbufsctxt * uctxts) : 0; 451 452 /* 453 * Set up the shadow copies of the piobufavail registers, 454 * which we compare against the chip registers for now, and 455 * the in memory DMA'ed copies of the registers. 456 * By now pioavail updates to memory should have occurred, so 457 * copy them into our working/shadow registers; this is in 458 * case something went wrong with abort, but mostly to get the 459 * initial values of the generation bit correct. 460 */ 461 for (i = 0; i < dd->pioavregs; i++) { 462 __le64 tmp; 463 464 tmp = dd->pioavailregs_dma[i]; 465 /* 466 * Don't need to worry about pioavailkernel here 467 * because we will call qib_chg_pioavailkernel() later 468 * in initialization, to busy out buffers as needed. 469 */ 470 dd->pioavailshadow[i] = le64_to_cpu(tmp); 471 } 472 while (i < ARRAY_SIZE(dd->pioavailshadow)) 473 dd->pioavailshadow[i++] = 0; /* for debugging sanity */ 474 475 /* after pioavailshadow is setup */ 476 qib_chg_pioavailkernel(dd, 0, dd->piobcnt2k + dd->piobcnt4k, 477 TXCHK_CHG_TYPE_KERN, NULL); 478 dd->f_initvl15_bufs(dd); 479 } 480 481 /** 482 * qib_init - do the actual initialization sequence on the chip 483 * @dd: the qlogic_ib device 484 * @reinit: reinitializing, so don't allocate new memory 485 * 486 * Do the actual initialization sequence on the chip. This is done 487 * both from the init routine called from the PCI infrastructure, and 488 * when we reset the chip, or detect that it was reset internally, 489 * or it's administratively re-enabled. 490 * 491 * Memory allocation here and in called routines is only done in 492 * the first case (reinit == 0). We have to be careful, because even 493 * without memory allocation, we need to re-write all the chip registers 494 * TIDs, etc. after the reset or enable has completed. 495 */ 496 int qib_init(struct qib_devdata *dd, int reinit) 497 { 498 int ret = 0, pidx, lastfail = 0; 499 u32 portok = 0; 500 unsigned i; 501 struct qib_ctxtdata *rcd; 502 struct qib_pportdata *ppd; 503 unsigned long flags; 504 505 /* Set linkstate to unknown, so we can watch for a transition. */ 506 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 507 ppd = dd->pport + pidx; 508 spin_lock_irqsave(&ppd->lflags_lock, flags); 509 ppd->lflags &= ~(QIBL_LINKACTIVE | QIBL_LINKARMED | 510 QIBL_LINKDOWN | QIBL_LINKINIT | 511 QIBL_LINKV); 512 spin_unlock_irqrestore(&ppd->lflags_lock, flags); 513 } 514 515 if (reinit) 516 ret = init_after_reset(dd); 517 else 518 ret = loadtime_init(dd); 519 if (ret) 520 goto done; 521 522 /* Bypass most chip-init, to get to device creation */ 523 if (qib_mini_init) 524 return 0; 525 526 ret = dd->f_late_initreg(dd); 527 if (ret) 528 goto done; 529 530 /* dd->rcd can be NULL if early init failed */ 531 for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) { 532 /* 533 * Set up the (kernel) rcvhdr queue and egr TIDs. If doing 534 * re-init, the simplest way to handle this is to free 535 * existing, and re-allocate. 536 * Need to re-create rest of ctxt 0 ctxtdata as well. 537 */ 538 rcd = dd->rcd[i]; 539 if (!rcd) 540 continue; 541 542 lastfail = qib_create_rcvhdrq(dd, rcd); 543 if (!lastfail) 544 lastfail = qib_setup_eagerbufs(rcd); 545 if (lastfail) { 546 qib_dev_err(dd, "failed to allocate kernel ctxt's " 547 "rcvhdrq and/or egr bufs\n"); 548 continue; 549 } 550 } 551 552 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 553 int mtu; 554 if (lastfail) 555 ret = lastfail; 556 ppd = dd->pport + pidx; 557 mtu = ib_mtu_enum_to_int(qib_ibmtu); 558 if (mtu == -1) { 559 mtu = QIB_DEFAULT_MTU; 560 qib_ibmtu = 0; /* don't leave invalid value */ 561 } 562 /* set max we can ever have for this driver load */ 563 ppd->init_ibmaxlen = min(mtu > 2048 ? 564 dd->piosize4k : dd->piosize2k, 565 dd->rcvegrbufsize + 566 (dd->rcvhdrentsize << 2)); 567 /* 568 * Have to initialize ibmaxlen, but this will normally 569 * change immediately in qib_set_mtu(). 570 */ 571 ppd->ibmaxlen = ppd->init_ibmaxlen; 572 qib_set_mtu(ppd, mtu); 573 574 spin_lock_irqsave(&ppd->lflags_lock, flags); 575 ppd->lflags |= QIBL_IB_LINK_DISABLED; 576 spin_unlock_irqrestore(&ppd->lflags_lock, flags); 577 578 lastfail = dd->f_bringup_serdes(ppd); 579 if (lastfail) { 580 qib_devinfo(dd->pcidev, 581 "Failed to bringup IB port %u\n", ppd->port); 582 lastfail = -ENETDOWN; 583 continue; 584 } 585 586 /* let link come up, and enable IBC */ 587 spin_lock_irqsave(&ppd->lflags_lock, flags); 588 ppd->lflags &= ~QIBL_IB_LINK_DISABLED; 589 spin_unlock_irqrestore(&ppd->lflags_lock, flags); 590 portok++; 591 } 592 593 if (!portok) { 594 /* none of the ports initialized */ 595 if (!ret && lastfail) 596 ret = lastfail; 597 else if (!ret) 598 ret = -ENETDOWN; 599 /* but continue on, so we can debug cause */ 600 } 601 602 enable_chip(dd); 603 604 init_piobuf_state(dd); 605 606 done: 607 if (!ret) { 608 /* chip is OK for user apps; mark it as initialized */ 609 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 610 ppd = dd->pport + pidx; 611 /* 612 * Set status even if port serdes is not initialized 613 * so that diags will work. 614 */ 615 *ppd->statusp |= QIB_STATUS_CHIP_PRESENT | 616 QIB_STATUS_INITTED; 617 if (!ppd->link_speed_enabled) 618 continue; 619 if (dd->flags & QIB_HAS_SEND_DMA) 620 ret = qib_setup_sdma(ppd); 621 init_timer(&ppd->hol_timer); 622 ppd->hol_timer.function = qib_hol_event; 623 ppd->hol_timer.data = (unsigned long)ppd; 624 ppd->hol_state = QIB_HOL_UP; 625 } 626 627 /* now we can enable all interrupts from the chip */ 628 dd->f_set_intr_state(dd, 1); 629 630 /* 631 * Setup to verify we get an interrupt, and fallback 632 * to an alternate if necessary and possible. 633 */ 634 mod_timer(&dd->intrchk_timer, jiffies + HZ/2); 635 /* start stats retrieval timer */ 636 mod_timer(&dd->stats_timer, jiffies + HZ * ACTIVITY_TIMER); 637 } 638 639 /* if ret is non-zero, we probably should do some cleanup here... */ 640 return ret; 641 } 642 643 /* 644 * These next two routines are placeholders in case we don't have per-arch 645 * code for controlling write combining. If explicit control of write 646 * combining is not available, performance will probably be awful. 647 */ 648 649 int __attribute__((weak)) qib_enable_wc(struct qib_devdata *dd) 650 { 651 return -EOPNOTSUPP; 652 } 653 654 void __attribute__((weak)) qib_disable_wc(struct qib_devdata *dd) 655 { 656 } 657 658 static inline struct qib_devdata *__qib_lookup(int unit) 659 { 660 return idr_find(&qib_unit_table, unit); 661 } 662 663 struct qib_devdata *qib_lookup(int unit) 664 { 665 struct qib_devdata *dd; 666 unsigned long flags; 667 668 spin_lock_irqsave(&qib_devs_lock, flags); 669 dd = __qib_lookup(unit); 670 spin_unlock_irqrestore(&qib_devs_lock, flags); 671 672 return dd; 673 } 674 675 /* 676 * Stop the timers during unit shutdown, or after an error late 677 * in initialization. 678 */ 679 static void qib_stop_timers(struct qib_devdata *dd) 680 { 681 struct qib_pportdata *ppd; 682 int pidx; 683 684 if (dd->stats_timer.data) { 685 del_timer_sync(&dd->stats_timer); 686 dd->stats_timer.data = 0; 687 } 688 if (dd->intrchk_timer.data) { 689 del_timer_sync(&dd->intrchk_timer); 690 dd->intrchk_timer.data = 0; 691 } 692 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 693 ppd = dd->pport + pidx; 694 if (ppd->hol_timer.data) 695 del_timer_sync(&ppd->hol_timer); 696 if (ppd->led_override_timer.data) { 697 del_timer_sync(&ppd->led_override_timer); 698 atomic_set(&ppd->led_override_timer_active, 0); 699 } 700 if (ppd->symerr_clear_timer.data) 701 del_timer_sync(&ppd->symerr_clear_timer); 702 } 703 } 704 705 /** 706 * qib_shutdown_device - shut down a device 707 * @dd: the qlogic_ib device 708 * 709 * This is called to make the device quiet when we are about to 710 * unload the driver, and also when the device is administratively 711 * disabled. It does not free any data structures. 712 * Everything it does has to be setup again by qib_init(dd, 1) 713 */ 714 static void qib_shutdown_device(struct qib_devdata *dd) 715 { 716 struct qib_pportdata *ppd; 717 unsigned pidx; 718 719 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 720 ppd = dd->pport + pidx; 721 722 spin_lock_irq(&ppd->lflags_lock); 723 ppd->lflags &= ~(QIBL_LINKDOWN | QIBL_LINKINIT | 724 QIBL_LINKARMED | QIBL_LINKACTIVE | 725 QIBL_LINKV); 726 spin_unlock_irq(&ppd->lflags_lock); 727 *ppd->statusp &= ~(QIB_STATUS_IB_CONF | QIB_STATUS_IB_READY); 728 } 729 dd->flags &= ~QIB_INITTED; 730 731 /* mask interrupts, but not errors */ 732 dd->f_set_intr_state(dd, 0); 733 734 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 735 ppd = dd->pport + pidx; 736 dd->f_rcvctrl(ppd, QIB_RCVCTRL_TAILUPD_DIS | 737 QIB_RCVCTRL_CTXT_DIS | 738 QIB_RCVCTRL_INTRAVAIL_DIS | 739 QIB_RCVCTRL_PKEY_ENB, -1); 740 /* 741 * Gracefully stop all sends allowing any in progress to 742 * trickle out first. 743 */ 744 dd->f_sendctrl(ppd, QIB_SENDCTRL_CLEAR); 745 } 746 747 /* 748 * Enough for anything that's going to trickle out to have actually 749 * done so. 750 */ 751 udelay(20); 752 753 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 754 ppd = dd->pport + pidx; 755 dd->f_setextled(ppd, 0); /* make sure LEDs are off */ 756 757 if (dd->flags & QIB_HAS_SEND_DMA) 758 qib_teardown_sdma(ppd); 759 760 dd->f_sendctrl(ppd, QIB_SENDCTRL_AVAIL_DIS | 761 QIB_SENDCTRL_SEND_DIS); 762 /* 763 * Clear SerdesEnable. 764 * We can't count on interrupts since we are stopping. 765 */ 766 dd->f_quiet_serdes(ppd); 767 } 768 769 qib_update_eeprom_log(dd); 770 } 771 772 /** 773 * qib_free_ctxtdata - free a context's allocated data 774 * @dd: the qlogic_ib device 775 * @rcd: the ctxtdata structure 776 * 777 * free up any allocated data for a context 778 * This should not touch anything that would affect a simultaneous 779 * re-allocation of context data, because it is called after qib_mutex 780 * is released (and can be called from reinit as well). 781 * It should never change any chip state, or global driver state. 782 */ 783 void qib_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd) 784 { 785 if (!rcd) 786 return; 787 788 if (rcd->rcvhdrq) { 789 dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size, 790 rcd->rcvhdrq, rcd->rcvhdrq_phys); 791 rcd->rcvhdrq = NULL; 792 if (rcd->rcvhdrtail_kvaddr) { 793 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, 794 rcd->rcvhdrtail_kvaddr, 795 rcd->rcvhdrqtailaddr_phys); 796 rcd->rcvhdrtail_kvaddr = NULL; 797 } 798 } 799 if (rcd->rcvegrbuf) { 800 unsigned e; 801 802 for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { 803 void *base = rcd->rcvegrbuf[e]; 804 size_t size = rcd->rcvegrbuf_size; 805 806 dma_free_coherent(&dd->pcidev->dev, size, 807 base, rcd->rcvegrbuf_phys[e]); 808 } 809 kfree(rcd->rcvegrbuf); 810 rcd->rcvegrbuf = NULL; 811 kfree(rcd->rcvegrbuf_phys); 812 rcd->rcvegrbuf_phys = NULL; 813 rcd->rcvegrbuf_chunks = 0; 814 } 815 816 kfree(rcd->tid_pg_list); 817 vfree(rcd->user_event_mask); 818 vfree(rcd->subctxt_uregbase); 819 vfree(rcd->subctxt_rcvegrbuf); 820 vfree(rcd->subctxt_rcvhdr_base); 821 kfree(rcd); 822 } 823 824 /* 825 * Perform a PIO buffer bandwidth write test, to verify proper system 826 * configuration. Even when all the setup calls work, occasionally 827 * BIOS or other issues can prevent write combining from working, or 828 * can cause other bandwidth problems to the chip. 829 * 830 * This test simply writes the same buffer over and over again, and 831 * measures close to the peak bandwidth to the chip (not testing 832 * data bandwidth to the wire). On chips that use an address-based 833 * trigger to send packets to the wire, this is easy. On chips that 834 * use a count to trigger, we want to make sure that the packet doesn't 835 * go out on the wire, or trigger flow control checks. 836 */ 837 static void qib_verify_pioperf(struct qib_devdata *dd) 838 { 839 u32 pbnum, cnt, lcnt; 840 u32 __iomem *piobuf; 841 u32 *addr; 842 u64 msecs, emsecs; 843 844 piobuf = dd->f_getsendbuf(dd->pport, 0ULL, &pbnum); 845 if (!piobuf) { 846 qib_devinfo(dd->pcidev, 847 "No PIObufs for checking perf, skipping\n"); 848 return; 849 } 850 851 /* 852 * Enough to give us a reasonable test, less than piobuf size, and 853 * likely multiple of store buffer length. 854 */ 855 cnt = 1024; 856 857 addr = vmalloc(cnt); 858 if (!addr) { 859 qib_devinfo(dd->pcidev, 860 "Couldn't get memory for checking PIO perf," 861 " skipping\n"); 862 goto done; 863 } 864 865 preempt_disable(); /* we want reasonably accurate elapsed time */ 866 msecs = 1 + jiffies_to_msecs(jiffies); 867 for (lcnt = 0; lcnt < 10000U; lcnt++) { 868 /* wait until we cross msec boundary */ 869 if (jiffies_to_msecs(jiffies) >= msecs) 870 break; 871 udelay(1); 872 } 873 874 dd->f_set_armlaunch(dd, 0); 875 876 /* 877 * length 0, no dwords actually sent 878 */ 879 writeq(0, piobuf); 880 qib_flush_wc(); 881 882 /* 883 * This is only roughly accurate, since even with preempt we 884 * still take interrupts that could take a while. Running for 885 * >= 5 msec seems to get us "close enough" to accurate values. 886 */ 887 msecs = jiffies_to_msecs(jiffies); 888 for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) { 889 qib_pio_copy(piobuf + 64, addr, cnt >> 2); 890 emsecs = jiffies_to_msecs(jiffies) - msecs; 891 } 892 893 /* 1 GiB/sec, slightly over IB SDR line rate */ 894 if (lcnt < (emsecs * 1024U)) 895 qib_dev_err(dd, 896 "Performance problem: bandwidth to PIO buffers is " 897 "only %u MiB/sec\n", 898 lcnt / (u32) emsecs); 899 900 preempt_enable(); 901 902 vfree(addr); 903 904 done: 905 /* disarm piobuf, so it's available again */ 906 dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(pbnum)); 907 qib_sendbuf_done(dd, pbnum); 908 dd->f_set_armlaunch(dd, 1); 909 } 910 911 912 void qib_free_devdata(struct qib_devdata *dd) 913 { 914 unsigned long flags; 915 916 spin_lock_irqsave(&qib_devs_lock, flags); 917 idr_remove(&qib_unit_table, dd->unit); 918 list_del(&dd->list); 919 spin_unlock_irqrestore(&qib_devs_lock, flags); 920 921 ib_dealloc_device(&dd->verbs_dev.ibdev); 922 } 923 924 /* 925 * Allocate our primary per-unit data structure. Must be done via verbs 926 * allocator, because the verbs cleanup process both does cleanup and 927 * free of the data structure. 928 * "extra" is for chip-specific data. 929 * 930 * Use the idr mechanism to get a unit number for this unit. 931 */ 932 struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) 933 { 934 unsigned long flags; 935 struct qib_devdata *dd; 936 int ret; 937 938 if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { 939 dd = ERR_PTR(-ENOMEM); 940 goto bail; 941 } 942 943 dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra); 944 if (!dd) { 945 dd = ERR_PTR(-ENOMEM); 946 goto bail; 947 } 948 949 spin_lock_irqsave(&qib_devs_lock, flags); 950 ret = idr_get_new(&qib_unit_table, dd, &dd->unit); 951 if (ret >= 0) 952 list_add(&dd->list, &qib_dev_list); 953 spin_unlock_irqrestore(&qib_devs_lock, flags); 954 955 if (ret < 0) { 956 qib_early_err(&pdev->dev, 957 "Could not allocate unit ID: error %d\n", -ret); 958 ib_dealloc_device(&dd->verbs_dev.ibdev); 959 dd = ERR_PTR(ret); 960 goto bail; 961 } 962 963 if (!qib_cpulist_count) { 964 u32 count = num_online_cpus(); 965 qib_cpulist = kzalloc(BITS_TO_LONGS(count) * 966 sizeof(long), GFP_KERNEL); 967 if (qib_cpulist) 968 qib_cpulist_count = count; 969 else 970 qib_early_err(&pdev->dev, "Could not alloc cpulist " 971 "info, cpu affinity might be wrong\n"); 972 } 973 974 bail: 975 return dd; 976 } 977 978 /* 979 * Called from freeze mode handlers, and from PCI error 980 * reporting code. Should be paranoid about state of 981 * system and data structures. 982 */ 983 void qib_disable_after_error(struct qib_devdata *dd) 984 { 985 if (dd->flags & QIB_INITTED) { 986 u32 pidx; 987 988 dd->flags &= ~QIB_INITTED; 989 if (dd->pport) 990 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 991 struct qib_pportdata *ppd; 992 993 ppd = dd->pport + pidx; 994 if (dd->flags & QIB_PRESENT) { 995 qib_set_linkstate(ppd, 996 QIB_IB_LINKDOWN_DISABLE); 997 dd->f_setextled(ppd, 0); 998 } 999 *ppd->statusp &= ~QIB_STATUS_IB_READY; 1000 } 1001 } 1002 1003 /* 1004 * Mark as having had an error for driver, and also 1005 * for /sys and status word mapped to user programs. 1006 * This marks unit as not usable, until reset. 1007 */ 1008 if (dd->devstatusp) 1009 *dd->devstatusp |= QIB_STATUS_HWERROR; 1010 } 1011 1012 static void __devexit qib_remove_one(struct pci_dev *); 1013 static int __devinit qib_init_one(struct pci_dev *, 1014 const struct pci_device_id *); 1015 1016 #define DRIVER_LOAD_MSG "QLogic " QIB_DRV_NAME " loaded: " 1017 #define PFX QIB_DRV_NAME ": " 1018 1019 static const struct pci_device_id qib_pci_tbl[] = { 1020 { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_QLOGIC_IB_6120) }, 1021 { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7220) }, 1022 { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7322) }, 1023 { 0, } 1024 }; 1025 1026 MODULE_DEVICE_TABLE(pci, qib_pci_tbl); 1027 1028 struct pci_driver qib_driver = { 1029 .name = QIB_DRV_NAME, 1030 .probe = qib_init_one, 1031 .remove = __devexit_p(qib_remove_one), 1032 .id_table = qib_pci_tbl, 1033 .err_handler = &qib_pci_err_handler, 1034 }; 1035 1036 /* 1037 * Do all the generic driver unit- and chip-independent memory 1038 * allocation and initialization. 1039 */ 1040 static int __init qlogic_ib_init(void) 1041 { 1042 int ret; 1043 1044 ret = qib_dev_init(); 1045 if (ret) 1046 goto bail; 1047 1048 /* 1049 * We create our own workqueue mainly because we want to be 1050 * able to flush it when devices are being removed. We can't 1051 * use schedule_work()/flush_scheduled_work() because both 1052 * unregister_netdev() and linkwatch_event take the rtnl lock, 1053 * so flush_scheduled_work() can deadlock during device 1054 * removal. 1055 */ 1056 qib_wq = create_workqueue("qib"); 1057 if (!qib_wq) { 1058 ret = -ENOMEM; 1059 goto bail_dev; 1060 } 1061 1062 qib_cq_wq = create_singlethread_workqueue("qib_cq"); 1063 if (!qib_cq_wq) { 1064 ret = -ENOMEM; 1065 goto bail_wq; 1066 } 1067 1068 /* 1069 * These must be called before the driver is registered with 1070 * the PCI subsystem. 1071 */ 1072 idr_init(&qib_unit_table); 1073 if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { 1074 printk(KERN_ERR QIB_DRV_NAME ": idr_pre_get() failed\n"); 1075 ret = -ENOMEM; 1076 goto bail_cq_wq; 1077 } 1078 1079 ret = pci_register_driver(&qib_driver); 1080 if (ret < 0) { 1081 printk(KERN_ERR QIB_DRV_NAME 1082 ": Unable to register driver: error %d\n", -ret); 1083 goto bail_unit; 1084 } 1085 1086 /* not fatal if it doesn't work */ 1087 if (qib_init_qibfs()) 1088 printk(KERN_ERR QIB_DRV_NAME ": Unable to register ipathfs\n"); 1089 goto bail; /* all OK */ 1090 1091 bail_unit: 1092 idr_destroy(&qib_unit_table); 1093 bail_cq_wq: 1094 destroy_workqueue(qib_cq_wq); 1095 bail_wq: 1096 destroy_workqueue(qib_wq); 1097 bail_dev: 1098 qib_dev_cleanup(); 1099 bail: 1100 return ret; 1101 } 1102 1103 module_init(qlogic_ib_init); 1104 1105 /* 1106 * Do the non-unit driver cleanup, memory free, etc. at unload. 1107 */ 1108 static void __exit qlogic_ib_cleanup(void) 1109 { 1110 int ret; 1111 1112 ret = qib_exit_qibfs(); 1113 if (ret) 1114 printk(KERN_ERR QIB_DRV_NAME ": " 1115 "Unable to cleanup counter filesystem: " 1116 "error %d\n", -ret); 1117 1118 pci_unregister_driver(&qib_driver); 1119 1120 destroy_workqueue(qib_wq); 1121 destroy_workqueue(qib_cq_wq); 1122 1123 qib_cpulist_count = 0; 1124 kfree(qib_cpulist); 1125 1126 idr_destroy(&qib_unit_table); 1127 qib_dev_cleanup(); 1128 } 1129 1130 module_exit(qlogic_ib_cleanup); 1131 1132 /* this can only be called after a successful initialization */ 1133 static void cleanup_device_data(struct qib_devdata *dd) 1134 { 1135 int ctxt; 1136 int pidx; 1137 struct qib_ctxtdata **tmp; 1138 unsigned long flags; 1139 1140 /* users can't do anything more with chip */ 1141 for (pidx = 0; pidx < dd->num_pports; ++pidx) 1142 if (dd->pport[pidx].statusp) 1143 *dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT; 1144 1145 if (!qib_wc_pat) 1146 qib_disable_wc(dd); 1147 1148 if (dd->pioavailregs_dma) { 1149 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, 1150 (void *) dd->pioavailregs_dma, 1151 dd->pioavailregs_phys); 1152 dd->pioavailregs_dma = NULL; 1153 } 1154 1155 if (dd->pageshadow) { 1156 struct page **tmpp = dd->pageshadow; 1157 dma_addr_t *tmpd = dd->physshadow; 1158 int i, cnt = 0; 1159 1160 for (ctxt = 0; ctxt < dd->cfgctxts; ctxt++) { 1161 int ctxt_tidbase = ctxt * dd->rcvtidcnt; 1162 int maxtid = ctxt_tidbase + dd->rcvtidcnt; 1163 1164 for (i = ctxt_tidbase; i < maxtid; i++) { 1165 if (!tmpp[i]) 1166 continue; 1167 pci_unmap_page(dd->pcidev, tmpd[i], 1168 PAGE_SIZE, PCI_DMA_FROMDEVICE); 1169 qib_release_user_pages(&tmpp[i], 1); 1170 tmpp[i] = NULL; 1171 cnt++; 1172 } 1173 } 1174 1175 tmpp = dd->pageshadow; 1176 dd->pageshadow = NULL; 1177 vfree(tmpp); 1178 } 1179 1180 /* 1181 * Free any resources still in use (usually just kernel contexts) 1182 * at unload; we do for ctxtcnt, because that's what we allocate. 1183 * We acquire lock to be really paranoid that rcd isn't being 1184 * accessed from some interrupt-related code (that should not happen, 1185 * but best to be sure). 1186 */ 1187 spin_lock_irqsave(&dd->uctxt_lock, flags); 1188 tmp = dd->rcd; 1189 dd->rcd = NULL; 1190 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 1191 for (ctxt = 0; tmp && ctxt < dd->ctxtcnt; ctxt++) { 1192 struct qib_ctxtdata *rcd = tmp[ctxt]; 1193 1194 tmp[ctxt] = NULL; /* debugging paranoia */ 1195 qib_free_ctxtdata(dd, rcd); 1196 } 1197 kfree(tmp); 1198 kfree(dd->boardname); 1199 } 1200 1201 /* 1202 * Clean up on unit shutdown, or error during unit load after 1203 * successful initialization. 1204 */ 1205 static void qib_postinit_cleanup(struct qib_devdata *dd) 1206 { 1207 /* 1208 * Clean up chip-specific stuff. 1209 * We check for NULL here, because it's outside 1210 * the kregbase check, and we need to call it 1211 * after the free_irq. Thus it's possible that 1212 * the function pointers were never initialized. 1213 */ 1214 if (dd->f_cleanup) 1215 dd->f_cleanup(dd); 1216 1217 qib_pcie_ddcleanup(dd); 1218 1219 cleanup_device_data(dd); 1220 1221 qib_free_devdata(dd); 1222 } 1223 1224 static int __devinit qib_init_one(struct pci_dev *pdev, 1225 const struct pci_device_id *ent) 1226 { 1227 int ret, j, pidx, initfail; 1228 struct qib_devdata *dd = NULL; 1229 1230 ret = qib_pcie_init(pdev, ent); 1231 if (ret) 1232 goto bail; 1233 1234 /* 1235 * Do device-specific initialiation, function table setup, dd 1236 * allocation, etc. 1237 */ 1238 switch (ent->device) { 1239 case PCI_DEVICE_ID_QLOGIC_IB_6120: 1240 #ifdef CONFIG_PCI_MSI 1241 dd = qib_init_iba6120_funcs(pdev, ent); 1242 #else 1243 qib_early_err(&pdev->dev, "QLogic PCIE device 0x%x cannot " 1244 "work if CONFIG_PCI_MSI is not enabled\n", 1245 ent->device); 1246 dd = ERR_PTR(-ENODEV); 1247 #endif 1248 break; 1249 1250 case PCI_DEVICE_ID_QLOGIC_IB_7220: 1251 dd = qib_init_iba7220_funcs(pdev, ent); 1252 break; 1253 1254 case PCI_DEVICE_ID_QLOGIC_IB_7322: 1255 dd = qib_init_iba7322_funcs(pdev, ent); 1256 break; 1257 1258 default: 1259 qib_early_err(&pdev->dev, "Failing on unknown QLogic " 1260 "deviceid 0x%x\n", ent->device); 1261 ret = -ENODEV; 1262 } 1263 1264 if (IS_ERR(dd)) 1265 ret = PTR_ERR(dd); 1266 if (ret) 1267 goto bail; /* error already printed */ 1268 1269 /* do the generic initialization */ 1270 initfail = qib_init(dd, 0); 1271 1272 ret = qib_register_ib_device(dd); 1273 1274 /* 1275 * Now ready for use. this should be cleared whenever we 1276 * detect a reset, or initiate one. If earlier failure, 1277 * we still create devices, so diags, etc. can be used 1278 * to determine cause of problem. 1279 */ 1280 if (!qib_mini_init && !initfail && !ret) 1281 dd->flags |= QIB_INITTED; 1282 1283 j = qib_device_create(dd); 1284 if (j) 1285 qib_dev_err(dd, "Failed to create /dev devices: %d\n", -j); 1286 j = qibfs_add(dd); 1287 if (j) 1288 qib_dev_err(dd, "Failed filesystem setup for counters: %d\n", 1289 -j); 1290 1291 if (qib_mini_init || initfail || ret) { 1292 qib_stop_timers(dd); 1293 flush_scheduled_work(); 1294 for (pidx = 0; pidx < dd->num_pports; ++pidx) 1295 dd->f_quiet_serdes(dd->pport + pidx); 1296 if (qib_mini_init) 1297 goto bail; 1298 if (!j) { 1299 (void) qibfs_remove(dd); 1300 qib_device_remove(dd); 1301 } 1302 if (!ret) 1303 qib_unregister_ib_device(dd); 1304 qib_postinit_cleanup(dd); 1305 if (initfail) 1306 ret = initfail; 1307 goto bail; 1308 } 1309 1310 if (!qib_wc_pat) { 1311 ret = qib_enable_wc(dd); 1312 if (ret) { 1313 qib_dev_err(dd, "Write combining not enabled " 1314 "(err %d): performance may be poor\n", 1315 -ret); 1316 ret = 0; 1317 } 1318 } 1319 1320 qib_verify_pioperf(dd); 1321 bail: 1322 return ret; 1323 } 1324 1325 static void __devexit qib_remove_one(struct pci_dev *pdev) 1326 { 1327 struct qib_devdata *dd = pci_get_drvdata(pdev); 1328 int ret; 1329 1330 /* unregister from IB core */ 1331 qib_unregister_ib_device(dd); 1332 1333 /* 1334 * Disable the IB link, disable interrupts on the device, 1335 * clear dma engines, etc. 1336 */ 1337 if (!qib_mini_init) 1338 qib_shutdown_device(dd); 1339 1340 qib_stop_timers(dd); 1341 1342 /* wait until all of our (qsfp) schedule_work() calls complete */ 1343 flush_scheduled_work(); 1344 1345 ret = qibfs_remove(dd); 1346 if (ret) 1347 qib_dev_err(dd, "Failed counters filesystem cleanup: %d\n", 1348 -ret); 1349 1350 qib_device_remove(dd); 1351 1352 qib_postinit_cleanup(dd); 1353 } 1354 1355 /** 1356 * qib_create_rcvhdrq - create a receive header queue 1357 * @dd: the qlogic_ib device 1358 * @rcd: the context data 1359 * 1360 * This must be contiguous memory (from an i/o perspective), and must be 1361 * DMA'able (which means for some systems, it will go through an IOMMU, 1362 * or be forced into a low address range). 1363 */ 1364 int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) 1365 { 1366 unsigned amt; 1367 1368 if (!rcd->rcvhdrq) { 1369 dma_addr_t phys_hdrqtail; 1370 gfp_t gfp_flags; 1371 1372 amt = ALIGN(dd->rcvhdrcnt * dd->rcvhdrentsize * 1373 sizeof(u32), PAGE_SIZE); 1374 gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? 1375 GFP_USER : GFP_KERNEL; 1376 rcd->rcvhdrq = dma_alloc_coherent( 1377 &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys, 1378 gfp_flags | __GFP_COMP); 1379 1380 if (!rcd->rcvhdrq) { 1381 qib_dev_err(dd, "attempt to allocate %d bytes " 1382 "for ctxt %u rcvhdrq failed\n", 1383 amt, rcd->ctxt); 1384 goto bail; 1385 } 1386 1387 if (rcd->ctxt >= dd->first_user_ctxt) { 1388 rcd->user_event_mask = vmalloc_user(PAGE_SIZE); 1389 if (!rcd->user_event_mask) 1390 goto bail_free_hdrq; 1391 } 1392 1393 if (!(dd->flags & QIB_NODMA_RTAIL)) { 1394 rcd->rcvhdrtail_kvaddr = dma_alloc_coherent( 1395 &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, 1396 gfp_flags); 1397 if (!rcd->rcvhdrtail_kvaddr) 1398 goto bail_free; 1399 rcd->rcvhdrqtailaddr_phys = phys_hdrqtail; 1400 } 1401 1402 rcd->rcvhdrq_size = amt; 1403 } 1404 1405 /* clear for security and sanity on each use */ 1406 memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size); 1407 if (rcd->rcvhdrtail_kvaddr) 1408 memset(rcd->rcvhdrtail_kvaddr, 0, PAGE_SIZE); 1409 return 0; 1410 1411 bail_free: 1412 qib_dev_err(dd, "attempt to allocate 1 page for ctxt %u " 1413 "rcvhdrqtailaddr failed\n", rcd->ctxt); 1414 vfree(rcd->user_event_mask); 1415 rcd->user_event_mask = NULL; 1416 bail_free_hdrq: 1417 dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq, 1418 rcd->rcvhdrq_phys); 1419 rcd->rcvhdrq = NULL; 1420 bail: 1421 return -ENOMEM; 1422 } 1423 1424 /** 1425 * allocate eager buffers, both kernel and user contexts. 1426 * @rcd: the context we are setting up. 1427 * 1428 * Allocate the eager TID buffers and program them into hip. 1429 * They are no longer completely contiguous, we do multiple allocation 1430 * calls. Otherwise we get the OOM code involved, by asking for too 1431 * much per call, with disastrous results on some kernels. 1432 */ 1433 int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) 1434 { 1435 struct qib_devdata *dd = rcd->dd; 1436 unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; 1437 size_t size; 1438 gfp_t gfp_flags; 1439 1440 /* 1441 * GFP_USER, but without GFP_FS, so buffer cache can be 1442 * coalesced (we hope); otherwise, even at order 4, 1443 * heavy filesystem activity makes these fail, and we can 1444 * use compound pages. 1445 */ 1446 gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; 1447 1448 egrcnt = rcd->rcvegrcnt; 1449 egroff = rcd->rcvegr_tid_base; 1450 egrsize = dd->rcvegrbufsize; 1451 1452 chunk = rcd->rcvegrbuf_chunks; 1453 egrperchunk = rcd->rcvegrbufs_perchunk; 1454 size = rcd->rcvegrbuf_size; 1455 if (!rcd->rcvegrbuf) { 1456 rcd->rcvegrbuf = 1457 kzalloc(chunk * sizeof(rcd->rcvegrbuf[0]), 1458 GFP_KERNEL); 1459 if (!rcd->rcvegrbuf) 1460 goto bail; 1461 } 1462 if (!rcd->rcvegrbuf_phys) { 1463 rcd->rcvegrbuf_phys = 1464 kmalloc(chunk * sizeof(rcd->rcvegrbuf_phys[0]), 1465 GFP_KERNEL); 1466 if (!rcd->rcvegrbuf_phys) 1467 goto bail_rcvegrbuf; 1468 } 1469 for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { 1470 if (rcd->rcvegrbuf[e]) 1471 continue; 1472 rcd->rcvegrbuf[e] = 1473 dma_alloc_coherent(&dd->pcidev->dev, size, 1474 &rcd->rcvegrbuf_phys[e], 1475 gfp_flags); 1476 if (!rcd->rcvegrbuf[e]) 1477 goto bail_rcvegrbuf_phys; 1478 } 1479 1480 rcd->rcvegr_phys = rcd->rcvegrbuf_phys[0]; 1481 1482 for (e = chunk = 0; chunk < rcd->rcvegrbuf_chunks; chunk++) { 1483 dma_addr_t pa = rcd->rcvegrbuf_phys[chunk]; 1484 unsigned i; 1485 1486 /* clear for security and sanity on each use */ 1487 memset(rcd->rcvegrbuf[chunk], 0, size); 1488 1489 for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) { 1490 dd->f_put_tid(dd, e + egroff + 1491 (u64 __iomem *) 1492 ((char __iomem *) 1493 dd->kregbase + 1494 dd->rcvegrbase), 1495 RCVHQ_RCV_TYPE_EAGER, pa); 1496 pa += egrsize; 1497 } 1498 cond_resched(); /* don't hog the cpu */ 1499 } 1500 1501 return 0; 1502 1503 bail_rcvegrbuf_phys: 1504 for (e = 0; e < rcd->rcvegrbuf_chunks && rcd->rcvegrbuf[e]; e++) 1505 dma_free_coherent(&dd->pcidev->dev, size, 1506 rcd->rcvegrbuf[e], rcd->rcvegrbuf_phys[e]); 1507 kfree(rcd->rcvegrbuf_phys); 1508 rcd->rcvegrbuf_phys = NULL; 1509 bail_rcvegrbuf: 1510 kfree(rcd->rcvegrbuf); 1511 rcd->rcvegrbuf = NULL; 1512 bail: 1513 return -ENOMEM; 1514 } 1515 1516 /* 1517 * Note: Changes to this routine should be mirrored 1518 * for the diagnostics routine qib_remap_ioaddr32(). 1519 * There is also related code for VL15 buffers in qib_init_7322_variables(). 1520 * The teardown code that unmaps is in qib_pcie_ddcleanup() 1521 */ 1522 int init_chip_wc_pat(struct qib_devdata *dd, u32 vl15buflen) 1523 { 1524 u64 __iomem *qib_kregbase = NULL; 1525 void __iomem *qib_piobase = NULL; 1526 u64 __iomem *qib_userbase = NULL; 1527 u64 qib_kreglen; 1528 u64 qib_pio2koffset = dd->piobufbase & 0xffffffff; 1529 u64 qib_pio4koffset = dd->piobufbase >> 32; 1530 u64 qib_pio2klen = dd->piobcnt2k * dd->palign; 1531 u64 qib_pio4klen = dd->piobcnt4k * dd->align4k; 1532 u64 qib_physaddr = dd->physaddr; 1533 u64 qib_piolen; 1534 u64 qib_userlen = 0; 1535 1536 /* 1537 * Free the old mapping because the kernel will try to reuse the 1538 * old mapping and not create a new mapping with the 1539 * write combining attribute. 1540 */ 1541 iounmap(dd->kregbase); 1542 dd->kregbase = NULL; 1543 1544 /* 1545 * Assumes chip address space looks like: 1546 * - kregs + sregs + cregs + uregs (in any order) 1547 * - piobufs (2K and 4K bufs in either order) 1548 * or: 1549 * - kregs + sregs + cregs (in any order) 1550 * - piobufs (2K and 4K bufs in either order) 1551 * - uregs 1552 */ 1553 if (dd->piobcnt4k == 0) { 1554 qib_kreglen = qib_pio2koffset; 1555 qib_piolen = qib_pio2klen; 1556 } else if (qib_pio2koffset < qib_pio4koffset) { 1557 qib_kreglen = qib_pio2koffset; 1558 qib_piolen = qib_pio4koffset + qib_pio4klen - qib_kreglen; 1559 } else { 1560 qib_kreglen = qib_pio4koffset; 1561 qib_piolen = qib_pio2koffset + qib_pio2klen - qib_kreglen; 1562 } 1563 qib_piolen += vl15buflen; 1564 /* Map just the configured ports (not all hw ports) */ 1565 if (dd->uregbase > qib_kreglen) 1566 qib_userlen = dd->ureg_align * dd->cfgctxts; 1567 1568 /* Sanity checks passed, now create the new mappings */ 1569 qib_kregbase = ioremap_nocache(qib_physaddr, qib_kreglen); 1570 if (!qib_kregbase) 1571 goto bail; 1572 1573 qib_piobase = ioremap_wc(qib_physaddr + qib_kreglen, qib_piolen); 1574 if (!qib_piobase) 1575 goto bail_kregbase; 1576 1577 if (qib_userlen) { 1578 qib_userbase = ioremap_nocache(qib_physaddr + dd->uregbase, 1579 qib_userlen); 1580 if (!qib_userbase) 1581 goto bail_piobase; 1582 } 1583 1584 dd->kregbase = qib_kregbase; 1585 dd->kregend = (u64 __iomem *) 1586 ((char __iomem *) qib_kregbase + qib_kreglen); 1587 dd->piobase = qib_piobase; 1588 dd->pio2kbase = (void __iomem *) 1589 (((char __iomem *) dd->piobase) + 1590 qib_pio2koffset - qib_kreglen); 1591 if (dd->piobcnt4k) 1592 dd->pio4kbase = (void __iomem *) 1593 (((char __iomem *) dd->piobase) + 1594 qib_pio4koffset - qib_kreglen); 1595 if (qib_userlen) 1596 /* ureg will now be accessed relative to dd->userbase */ 1597 dd->userbase = qib_userbase; 1598 return 0; 1599 1600 bail_piobase: 1601 iounmap(qib_piobase); 1602 bail_kregbase: 1603 iounmap(qib_kregbase); 1604 bail: 1605 return -ENOMEM; 1606 } 1607