1 /* 2 ** IA64 System Bus Adapter (SBA) I/O MMU manager 3 ** 4 ** (c) Copyright 2002-2005 Alex Williamson 5 ** (c) Copyright 2002-2003 Grant Grundler 6 ** (c) Copyright 2002-2005 Hewlett-Packard Company 7 ** 8 ** Portions (c) 2000 Grant Grundler (from parisc I/O MMU code) 9 ** Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code) 10 ** 11 ** This program is free software; you can redistribute it and/or modify 12 ** it under the terms of the GNU General Public License as published by 13 ** the Free Software Foundation; either version 2 of the License, or 14 ** (at your option) any later version. 15 ** 16 ** 17 ** This module initializes the IOC (I/O Controller) found on HP 18 ** McKinley machines and their successors. 19 ** 20 */ 21 22 #include <linux/types.h> 23 #include <linux/kernel.h> 24 #include <linux/module.h> 25 #include <linux/spinlock.h> 26 #include <linux/slab.h> 27 #include <linux/init.h> 28 #include <linux/mm.h> 29 #include <linux/string.h> 30 #include <linux/pci.h> 31 #include <linux/proc_fs.h> 32 #include <linux/seq_file.h> 33 #include <linux/acpi.h> 34 #include <linux/efi.h> 35 #include <linux/nodemask.h> 36 #include <linux/bitops.h> /* hweight64() */ 37 38 #include <asm/delay.h> /* ia64_get_itc() */ 39 #include <asm/io.h> 40 #include <asm/page.h> /* PAGE_OFFSET */ 41 #include <asm/dma.h> 42 #include <asm/system.h> /* wmb() */ 43 44 #include <asm/acpi-ext.h> 45 46 #define PFX "IOC: " 47 48 /* 49 ** Enabling timing search of the pdir resource map. Output in /proc. 50 ** Disabled by default to optimize performance. 51 */ 52 #undef PDIR_SEARCH_TIMING 53 54 /* 55 ** This option allows cards capable of 64bit DMA to bypass the IOMMU. If 56 ** not defined, all DMA will be 32bit and go through the TLB. 57 ** There's potentially a conflict in the bio merge code with us 58 ** advertising an iommu, but then bypassing it. Since I/O MMU bypassing 59 ** appears to give more performance than bio-level virtual merging, we'll 60 ** do the former for now. NOTE: BYPASS_SG also needs to be undef'd to 61 ** completely restrict DMA to the IOMMU. 62 */ 63 #define ALLOW_IOV_BYPASS 64 65 /* 66 ** This option specifically allows/disallows bypassing scatterlists with 67 ** multiple entries. Coalescing these entries can allow better DMA streaming 68 ** and in some cases shows better performance than entirely bypassing the 69 ** IOMMU. Performance increase on the order of 1-2% sequential output/input 70 ** using bonnie++ on a RAID0 MD device (sym2 & mpt). 71 */ 72 #undef ALLOW_IOV_BYPASS_SG 73 74 /* 75 ** If a device prefetches beyond the end of a valid pdir entry, it will cause 76 ** a hard failure, ie. MCA. Version 3.0 and later of the zx1 LBA should 77 ** disconnect on 4k boundaries and prevent such issues. If the device is 78 ** particularly aggressive, this option will keep the entire pdir valid such 79 ** that prefetching will hit a valid address. This could severely impact 80 ** error containment, and is therefore off by default. The page that is 81 ** used for spill-over is poisoned, so that should help debugging somewhat. 82 */ 83 #undef FULL_VALID_PDIR 84 85 #define ENABLE_MARK_CLEAN 86 87 /* 88 ** The number of debug flags is a clue - this code is fragile. NOTE: since 89 ** tightening the use of res_lock the resource bitmap and actual pdir are no 90 ** longer guaranteed to stay in sync. The sanity checking code isn't going to 91 ** like that. 92 */ 93 #undef DEBUG_SBA_INIT 94 #undef DEBUG_SBA_RUN 95 #undef DEBUG_SBA_RUN_SG 96 #undef DEBUG_SBA_RESOURCE 97 #undef ASSERT_PDIR_SANITY 98 #undef DEBUG_LARGE_SG_ENTRIES 99 #undef DEBUG_BYPASS 100 101 #if defined(FULL_VALID_PDIR) && defined(ASSERT_PDIR_SANITY) 102 #error FULL_VALID_PDIR and ASSERT_PDIR_SANITY are mutually exclusive 103 #endif 104 105 #define SBA_INLINE __inline__ 106 /* #define SBA_INLINE */ 107 108 #ifdef DEBUG_SBA_INIT 109 #define DBG_INIT(x...) printk(x) 110 #else 111 #define DBG_INIT(x...) 112 #endif 113 114 #ifdef DEBUG_SBA_RUN 115 #define DBG_RUN(x...) printk(x) 116 #else 117 #define DBG_RUN(x...) 118 #endif 119 120 #ifdef DEBUG_SBA_RUN_SG 121 #define DBG_RUN_SG(x...) printk(x) 122 #else 123 #define DBG_RUN_SG(x...) 124 #endif 125 126 127 #ifdef DEBUG_SBA_RESOURCE 128 #define DBG_RES(x...) printk(x) 129 #else 130 #define DBG_RES(x...) 131 #endif 132 133 #ifdef DEBUG_BYPASS 134 #define DBG_BYPASS(x...) printk(x) 135 #else 136 #define DBG_BYPASS(x...) 137 #endif 138 139 #ifdef ASSERT_PDIR_SANITY 140 #define ASSERT(expr) \ 141 if(!(expr)) { \ 142 printk( "\n" __FILE__ ":%d: Assertion " #expr " failed!\n",__LINE__); \ 143 panic(#expr); \ 144 } 145 #else 146 #define ASSERT(expr) 147 #endif 148 149 /* 150 ** The number of pdir entries to "free" before issuing 151 ** a read to PCOM register to flush out PCOM writes. 152 ** Interacts with allocation granularity (ie 4 or 8 entries 153 ** allocated and free'd/purged at a time might make this 154 ** less interesting). 155 */ 156 #define DELAYED_RESOURCE_CNT 64 157 158 #define PCI_DEVICE_ID_HP_SX2000_IOC 0x12ec 159 160 #define ZX1_IOC_ID ((PCI_DEVICE_ID_HP_ZX1_IOC << 16) | PCI_VENDOR_ID_HP) 161 #define ZX2_IOC_ID ((PCI_DEVICE_ID_HP_ZX2_IOC << 16) | PCI_VENDOR_ID_HP) 162 #define REO_IOC_ID ((PCI_DEVICE_ID_HP_REO_IOC << 16) | PCI_VENDOR_ID_HP) 163 #define SX1000_IOC_ID ((PCI_DEVICE_ID_HP_SX1000_IOC << 16) | PCI_VENDOR_ID_HP) 164 #define SX2000_IOC_ID ((PCI_DEVICE_ID_HP_SX2000_IOC << 16) | PCI_VENDOR_ID_HP) 165 166 #define ZX1_IOC_OFFSET 0x1000 /* ACPI reports SBA, we want IOC */ 167 168 #define IOC_FUNC_ID 0x000 169 #define IOC_FCLASS 0x008 /* function class, bist, header, rev... */ 170 #define IOC_IBASE 0x300 /* IO TLB */ 171 #define IOC_IMASK 0x308 172 #define IOC_PCOM 0x310 173 #define IOC_TCNFG 0x318 174 #define IOC_PDIR_BASE 0x320 175 176 #define IOC_ROPE0_CFG 0x500 177 #define IOC_ROPE_AO 0x10 /* Allow "Relaxed Ordering" */ 178 179 180 /* AGP GART driver looks for this */ 181 #define ZX1_SBA_IOMMU_COOKIE 0x0000badbadc0ffeeUL 182 183 /* 184 ** The zx1 IOC supports 4/8/16/64KB page sizes (see TCNFG register) 185 ** 186 ** Some IOCs (sx1000) can run at the above pages sizes, but are 187 ** really only supported using the IOC at a 4k page size. 188 ** 189 ** iovp_size could only be greater than PAGE_SIZE if we are 190 ** confident the drivers really only touch the next physical 191 ** page iff that driver instance owns it. 192 */ 193 static unsigned long iovp_size; 194 static unsigned long iovp_shift; 195 static unsigned long iovp_mask; 196 197 struct ioc { 198 void __iomem *ioc_hpa; /* I/O MMU base address */ 199 char *res_map; /* resource map, bit == pdir entry */ 200 u64 *pdir_base; /* physical base address */ 201 unsigned long ibase; /* pdir IOV Space base */ 202 unsigned long imask; /* pdir IOV Space mask */ 203 204 unsigned long *res_hint; /* next avail IOVP - circular search */ 205 unsigned long dma_mask; 206 spinlock_t res_lock; /* protects the resource bitmap, but must be held when */ 207 /* clearing pdir to prevent races with allocations. */ 208 unsigned int res_bitshift; /* from the RIGHT! */ 209 unsigned int res_size; /* size of resource map in bytes */ 210 #ifdef CONFIG_NUMA 211 unsigned int node; /* node where this IOC lives */ 212 #endif 213 #if DELAYED_RESOURCE_CNT > 0 214 spinlock_t saved_lock; /* may want to try to get this on a separate cacheline */ 215 /* than res_lock for bigger systems. */ 216 int saved_cnt; 217 struct sba_dma_pair { 218 dma_addr_t iova; 219 size_t size; 220 } saved[DELAYED_RESOURCE_CNT]; 221 #endif 222 223 #ifdef PDIR_SEARCH_TIMING 224 #define SBA_SEARCH_SAMPLE 0x100 225 unsigned long avg_search[SBA_SEARCH_SAMPLE]; 226 unsigned long avg_idx; /* current index into avg_search */ 227 #endif 228 229 /* Stuff we don't need in performance path */ 230 struct ioc *next; /* list of IOC's in system */ 231 acpi_handle handle; /* for multiple IOC's */ 232 const char *name; 233 unsigned int func_id; 234 unsigned int rev; /* HW revision of chip */ 235 u32 iov_size; 236 unsigned int pdir_size; /* in bytes, determined by IOV Space size */ 237 struct pci_dev *sac_only_dev; 238 }; 239 240 static struct ioc *ioc_list; 241 static int reserve_sba_gart = 1; 242 243 static SBA_INLINE void sba_mark_invalid(struct ioc *, dma_addr_t, size_t); 244 static SBA_INLINE void sba_free_range(struct ioc *, dma_addr_t, size_t); 245 246 #define sba_sg_address(sg) (page_address((sg)->page) + (sg)->offset) 247 248 #ifdef FULL_VALID_PDIR 249 static u64 prefetch_spill_page; 250 #endif 251 252 #ifdef CONFIG_PCI 253 # define GET_IOC(dev) (((dev)->bus == &pci_bus_type) \ 254 ? ((struct ioc *) PCI_CONTROLLER(to_pci_dev(dev))->iommu) : NULL) 255 #else 256 # define GET_IOC(dev) NULL 257 #endif 258 259 /* 260 ** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up 261 ** (or rather not merge) DMAs into manageable chunks. 262 ** On parisc, this is more of the software/tuning constraint 263 ** rather than the HW. I/O MMU allocation algorithms can be 264 ** faster with smaller sizes (to some degree). 265 */ 266 #define DMA_CHUNK_SIZE (BITS_PER_LONG*iovp_size) 267 268 #define ROUNDUP(x,y) ((x + ((y)-1)) & ~((y)-1)) 269 270 /************************************ 271 ** SBA register read and write support 272 ** 273 ** BE WARNED: register writes are posted. 274 ** (ie follow writes which must reach HW with a read) 275 ** 276 */ 277 #define READ_REG(addr) __raw_readq(addr) 278 #define WRITE_REG(val, addr) __raw_writeq(val, addr) 279 280 #ifdef DEBUG_SBA_INIT 281 282 /** 283 * sba_dump_tlb - debugging only - print IOMMU operating parameters 284 * @hpa: base address of the IOMMU 285 * 286 * Print the size/location of the IO MMU PDIR. 287 */ 288 static void 289 sba_dump_tlb(char *hpa) 290 { 291 DBG_INIT("IO TLB at 0x%p\n", (void *)hpa); 292 DBG_INIT("IOC_IBASE : %016lx\n", READ_REG(hpa+IOC_IBASE)); 293 DBG_INIT("IOC_IMASK : %016lx\n", READ_REG(hpa+IOC_IMASK)); 294 DBG_INIT("IOC_TCNFG : %016lx\n", READ_REG(hpa+IOC_TCNFG)); 295 DBG_INIT("IOC_PDIR_BASE: %016lx\n", READ_REG(hpa+IOC_PDIR_BASE)); 296 DBG_INIT("\n"); 297 } 298 #endif 299 300 301 #ifdef ASSERT_PDIR_SANITY 302 303 /** 304 * sba_dump_pdir_entry - debugging only - print one IOMMU PDIR entry 305 * @ioc: IO MMU structure which owns the pdir we are interested in. 306 * @msg: text to print ont the output line. 307 * @pide: pdir index. 308 * 309 * Print one entry of the IO MMU PDIR in human readable form. 310 */ 311 static void 312 sba_dump_pdir_entry(struct ioc *ioc, char *msg, uint pide) 313 { 314 /* start printing from lowest pde in rval */ 315 u64 *ptr = &ioc->pdir_base[pide & ~(BITS_PER_LONG - 1)]; 316 unsigned long *rptr = (unsigned long *) &ioc->res_map[(pide >>3) & -sizeof(unsigned long)]; 317 uint rcnt; 318 319 printk(KERN_DEBUG "SBA: %s rp %p bit %d rval 0x%lx\n", 320 msg, rptr, pide & (BITS_PER_LONG - 1), *rptr); 321 322 rcnt = 0; 323 while (rcnt < BITS_PER_LONG) { 324 printk(KERN_DEBUG "%s %2d %p %016Lx\n", 325 (rcnt == (pide & (BITS_PER_LONG - 1))) 326 ? " -->" : " ", 327 rcnt, ptr, (unsigned long long) *ptr ); 328 rcnt++; 329 ptr++; 330 } 331 printk(KERN_DEBUG "%s", msg); 332 } 333 334 335 /** 336 * sba_check_pdir - debugging only - consistency checker 337 * @ioc: IO MMU structure which owns the pdir we are interested in. 338 * @msg: text to print ont the output line. 339 * 340 * Verify the resource map and pdir state is consistent 341 */ 342 static int 343 sba_check_pdir(struct ioc *ioc, char *msg) 344 { 345 u64 *rptr_end = (u64 *) &(ioc->res_map[ioc->res_size]); 346 u64 *rptr = (u64 *) ioc->res_map; /* resource map ptr */ 347 u64 *pptr = ioc->pdir_base; /* pdir ptr */ 348 uint pide = 0; 349 350 while (rptr < rptr_end) { 351 u64 rval; 352 int rcnt; /* number of bits we might check */ 353 354 rval = *rptr; 355 rcnt = 64; 356 357 while (rcnt) { 358 /* Get last byte and highest bit from that */ 359 u32 pde = ((u32)((*pptr >> (63)) & 0x1)); 360 if ((rval & 0x1) ^ pde) 361 { 362 /* 363 ** BUMMER! -- res_map != pdir -- 364 ** Dump rval and matching pdir entries 365 */ 366 sba_dump_pdir_entry(ioc, msg, pide); 367 return(1); 368 } 369 rcnt--; 370 rval >>= 1; /* try the next bit */ 371 pptr++; 372 pide++; 373 } 374 rptr++; /* look at next word of res_map */ 375 } 376 /* It'd be nice if we always got here :^) */ 377 return 0; 378 } 379 380 381 /** 382 * sba_dump_sg - debugging only - print Scatter-Gather list 383 * @ioc: IO MMU structure which owns the pdir we are interested in. 384 * @startsg: head of the SG list 385 * @nents: number of entries in SG list 386 * 387 * print the SG list so we can verify it's correct by hand. 388 */ 389 static void 390 sba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents) 391 { 392 while (nents-- > 0) { 393 printk(KERN_DEBUG " %d : DMA %08lx/%05x CPU %p\n", nents, 394 startsg->dma_address, startsg->dma_length, 395 sba_sg_address(startsg)); 396 startsg++; 397 } 398 } 399 400 static void 401 sba_check_sg( struct ioc *ioc, struct scatterlist *startsg, int nents) 402 { 403 struct scatterlist *the_sg = startsg; 404 int the_nents = nents; 405 406 while (the_nents-- > 0) { 407 if (sba_sg_address(the_sg) == 0x0UL) 408 sba_dump_sg(NULL, startsg, nents); 409 the_sg++; 410 } 411 } 412 413 #endif /* ASSERT_PDIR_SANITY */ 414 415 416 417 418 /************************************************************** 419 * 420 * I/O Pdir Resource Management 421 * 422 * Bits set in the resource map are in use. 423 * Each bit can represent a number of pages. 424 * LSbs represent lower addresses (IOVA's). 425 * 426 ***************************************************************/ 427 #define PAGES_PER_RANGE 1 /* could increase this to 4 or 8 if needed */ 428 429 /* Convert from IOVP to IOVA and vice versa. */ 430 #define SBA_IOVA(ioc,iovp,offset) ((ioc->ibase) | (iovp) | (offset)) 431 #define SBA_IOVP(ioc,iova) ((iova) & ~(ioc->ibase)) 432 433 #define PDIR_ENTRY_SIZE sizeof(u64) 434 435 #define PDIR_INDEX(iovp) ((iovp)>>iovp_shift) 436 437 #define RESMAP_MASK(n) ~(~0UL << (n)) 438 #define RESMAP_IDX_MASK (sizeof(unsigned long) - 1) 439 440 441 /** 442 * For most cases the normal get_order is sufficient, however it limits us 443 * to PAGE_SIZE being the minimum mapping alignment and TC flush granularity. 444 * It only incurs about 1 clock cycle to use this one with the static variable 445 * and makes the code more intuitive. 446 */ 447 static SBA_INLINE int 448 get_iovp_order (unsigned long size) 449 { 450 long double d = size - 1; 451 long order; 452 453 order = ia64_getf_exp(d); 454 order = order - iovp_shift - 0xffff + 1; 455 if (order < 0) 456 order = 0; 457 return order; 458 } 459 460 /** 461 * sba_search_bitmap - find free space in IO PDIR resource bitmap 462 * @ioc: IO MMU structure which owns the pdir we are interested in. 463 * @bits_wanted: number of entries we need. 464 * @use_hint: use res_hint to indicate where to start looking 465 * 466 * Find consecutive free bits in resource bitmap. 467 * Each bit represents one entry in the IO Pdir. 468 * Cool perf optimization: search for log2(size) bits at a time. 469 */ 470 static SBA_INLINE unsigned long 471 sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint) 472 { 473 unsigned long *res_ptr; 474 unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]); 475 unsigned long flags, pide = ~0UL; 476 477 ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0); 478 ASSERT(res_ptr < res_end); 479 480 spin_lock_irqsave(&ioc->res_lock, flags); 481 482 /* Allow caller to force a search through the entire resource space */ 483 if (likely(use_hint)) { 484 res_ptr = ioc->res_hint; 485 } else { 486 res_ptr = (ulong *)ioc->res_map; 487 ioc->res_bitshift = 0; 488 } 489 490 /* 491 * N.B. REO/Grande defect AR2305 can cause TLB fetch timeouts 492 * if a TLB entry is purged while in use. sba_mark_invalid() 493 * purges IOTLB entries in power-of-two sizes, so we also 494 * allocate IOVA space in power-of-two sizes. 495 */ 496 bits_wanted = 1UL << get_iovp_order(bits_wanted << iovp_shift); 497 498 if (likely(bits_wanted == 1)) { 499 unsigned int bitshiftcnt; 500 for(; res_ptr < res_end ; res_ptr++) { 501 if (likely(*res_ptr != ~0UL)) { 502 bitshiftcnt = ffz(*res_ptr); 503 *res_ptr |= (1UL << bitshiftcnt); 504 pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); 505 pide <<= 3; /* convert to bit address */ 506 pide += bitshiftcnt; 507 ioc->res_bitshift = bitshiftcnt + bits_wanted; 508 goto found_it; 509 } 510 } 511 goto not_found; 512 513 } 514 515 if (likely(bits_wanted <= BITS_PER_LONG/2)) { 516 /* 517 ** Search the resource bit map on well-aligned values. 518 ** "o" is the alignment. 519 ** We need the alignment to invalidate I/O TLB using 520 ** SBA HW features in the unmap path. 521 */ 522 unsigned long o = 1 << get_iovp_order(bits_wanted << iovp_shift); 523 uint bitshiftcnt = ROUNDUP(ioc->res_bitshift, o); 524 unsigned long mask, base_mask; 525 526 base_mask = RESMAP_MASK(bits_wanted); 527 mask = base_mask << bitshiftcnt; 528 529 DBG_RES("%s() o %ld %p", __FUNCTION__, o, res_ptr); 530 for(; res_ptr < res_end ; res_ptr++) 531 { 532 DBG_RES(" %p %lx %lx\n", res_ptr, mask, *res_ptr); 533 ASSERT(0 != mask); 534 for (; mask ; mask <<= o, bitshiftcnt += o) { 535 if(0 == ((*res_ptr) & mask)) { 536 *res_ptr |= mask; /* mark resources busy! */ 537 pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); 538 pide <<= 3; /* convert to bit address */ 539 pide += bitshiftcnt; 540 ioc->res_bitshift = bitshiftcnt + bits_wanted; 541 goto found_it; 542 } 543 } 544 545 bitshiftcnt = 0; 546 mask = base_mask; 547 548 } 549 550 } else { 551 int qwords, bits, i; 552 unsigned long *end; 553 554 qwords = bits_wanted >> 6; /* /64 */ 555 bits = bits_wanted - (qwords * BITS_PER_LONG); 556 557 end = res_end - qwords; 558 559 for (; res_ptr < end; res_ptr++) { 560 for (i = 0 ; i < qwords ; i++) { 561 if (res_ptr[i] != 0) 562 goto next_ptr; 563 } 564 if (bits && res_ptr[i] && (__ffs(res_ptr[i]) < bits)) 565 continue; 566 567 /* Found it, mark it */ 568 for (i = 0 ; i < qwords ; i++) 569 res_ptr[i] = ~0UL; 570 res_ptr[i] |= RESMAP_MASK(bits); 571 572 pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); 573 pide <<= 3; /* convert to bit address */ 574 res_ptr += qwords; 575 ioc->res_bitshift = bits; 576 goto found_it; 577 next_ptr: 578 ; 579 } 580 } 581 582 not_found: 583 prefetch(ioc->res_map); 584 ioc->res_hint = (unsigned long *) ioc->res_map; 585 ioc->res_bitshift = 0; 586 spin_unlock_irqrestore(&ioc->res_lock, flags); 587 return (pide); 588 589 found_it: 590 ioc->res_hint = res_ptr; 591 spin_unlock_irqrestore(&ioc->res_lock, flags); 592 return (pide); 593 } 594 595 596 /** 597 * sba_alloc_range - find free bits and mark them in IO PDIR resource bitmap 598 * @ioc: IO MMU structure which owns the pdir we are interested in. 599 * @size: number of bytes to create a mapping for 600 * 601 * Given a size, find consecutive unmarked and then mark those bits in the 602 * resource bit map. 603 */ 604 static int 605 sba_alloc_range(struct ioc *ioc, size_t size) 606 { 607 unsigned int pages_needed = size >> iovp_shift; 608 #ifdef PDIR_SEARCH_TIMING 609 unsigned long itc_start; 610 #endif 611 unsigned long pide; 612 613 ASSERT(pages_needed); 614 ASSERT(0 == (size & ~iovp_mask)); 615 616 #ifdef PDIR_SEARCH_TIMING 617 itc_start = ia64_get_itc(); 618 #endif 619 /* 620 ** "seek and ye shall find"...praying never hurts either... 621 */ 622 pide = sba_search_bitmap(ioc, pages_needed, 1); 623 if (unlikely(pide >= (ioc->res_size << 3))) { 624 pide = sba_search_bitmap(ioc, pages_needed, 0); 625 if (unlikely(pide >= (ioc->res_size << 3))) { 626 #if DELAYED_RESOURCE_CNT > 0 627 unsigned long flags; 628 629 /* 630 ** With delayed resource freeing, we can give this one more shot. We're 631 ** getting close to being in trouble here, so do what we can to make this 632 ** one count. 633 */ 634 spin_lock_irqsave(&ioc->saved_lock, flags); 635 if (ioc->saved_cnt > 0) { 636 struct sba_dma_pair *d; 637 int cnt = ioc->saved_cnt; 638 639 d = &(ioc->saved[ioc->saved_cnt - 1]); 640 641 spin_lock(&ioc->res_lock); 642 while (cnt--) { 643 sba_mark_invalid(ioc, d->iova, d->size); 644 sba_free_range(ioc, d->iova, d->size); 645 d--; 646 } 647 ioc->saved_cnt = 0; 648 READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ 649 spin_unlock(&ioc->res_lock); 650 } 651 spin_unlock_irqrestore(&ioc->saved_lock, flags); 652 653 pide = sba_search_bitmap(ioc, pages_needed, 0); 654 if (unlikely(pide >= (ioc->res_size << 3))) 655 panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n", 656 ioc->ioc_hpa); 657 #else 658 panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n", 659 ioc->ioc_hpa); 660 #endif 661 } 662 } 663 664 #ifdef PDIR_SEARCH_TIMING 665 ioc->avg_search[ioc->avg_idx++] = (ia64_get_itc() - itc_start) / pages_needed; 666 ioc->avg_idx &= SBA_SEARCH_SAMPLE - 1; 667 #endif 668 669 prefetchw(&(ioc->pdir_base[pide])); 670 671 #ifdef ASSERT_PDIR_SANITY 672 /* verify the first enable bit is clear */ 673 if(0x00 != ((u8 *) ioc->pdir_base)[pide*PDIR_ENTRY_SIZE + 7]) { 674 sba_dump_pdir_entry(ioc, "sba_search_bitmap() botched it?", pide); 675 } 676 #endif 677 678 DBG_RES("%s(%x) %d -> %lx hint %x/%x\n", 679 __FUNCTION__, size, pages_needed, pide, 680 (uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map), 681 ioc->res_bitshift ); 682 683 return (pide); 684 } 685 686 687 /** 688 * sba_free_range - unmark bits in IO PDIR resource bitmap 689 * @ioc: IO MMU structure which owns the pdir we are interested in. 690 * @iova: IO virtual address which was previously allocated. 691 * @size: number of bytes to create a mapping for 692 * 693 * clear bits in the ioc's resource map 694 */ 695 static SBA_INLINE void 696 sba_free_range(struct ioc *ioc, dma_addr_t iova, size_t size) 697 { 698 unsigned long iovp = SBA_IOVP(ioc, iova); 699 unsigned int pide = PDIR_INDEX(iovp); 700 unsigned int ridx = pide >> 3; /* convert bit to byte address */ 701 unsigned long *res_ptr = (unsigned long *) &((ioc)->res_map[ridx & ~RESMAP_IDX_MASK]); 702 int bits_not_wanted = size >> iovp_shift; 703 unsigned long m; 704 705 /* Round up to power-of-two size: see AR2305 note above */ 706 bits_not_wanted = 1UL << get_iovp_order(bits_not_wanted << iovp_shift); 707 for (; bits_not_wanted > 0 ; res_ptr++) { 708 709 if (unlikely(bits_not_wanted > BITS_PER_LONG)) { 710 711 /* these mappings start 64bit aligned */ 712 *res_ptr = 0UL; 713 bits_not_wanted -= BITS_PER_LONG; 714 pide += BITS_PER_LONG; 715 716 } else { 717 718 /* 3-bits "bit" address plus 2 (or 3) bits for "byte" == bit in word */ 719 m = RESMAP_MASK(bits_not_wanted) << (pide & (BITS_PER_LONG - 1)); 720 bits_not_wanted = 0; 721 722 DBG_RES("%s( ,%x,%x) %x/%lx %x %p %lx\n", __FUNCTION__, (uint) iova, size, 723 bits_not_wanted, m, pide, res_ptr, *res_ptr); 724 725 ASSERT(m != 0); 726 ASSERT(bits_not_wanted); 727 ASSERT((*res_ptr & m) == m); /* verify same bits are set */ 728 *res_ptr &= ~m; 729 } 730 } 731 } 732 733 734 /************************************************************** 735 * 736 * "Dynamic DMA Mapping" support (aka "Coherent I/O") 737 * 738 ***************************************************************/ 739 740 /** 741 * sba_io_pdir_entry - fill in one IO PDIR entry 742 * @pdir_ptr: pointer to IO PDIR entry 743 * @vba: Virtual CPU address of buffer to map 744 * 745 * SBA Mapping Routine 746 * 747 * Given a virtual address (vba, arg1) sba_io_pdir_entry() 748 * loads the I/O PDIR entry pointed to by pdir_ptr (arg0). 749 * Each IO Pdir entry consists of 8 bytes as shown below 750 * (LSB == bit 0): 751 * 752 * 63 40 11 7 0 753 * +-+---------------------+----------------------------------+----+--------+ 754 * |V| U | PPN[39:12] | U | FF | 755 * +-+---------------------+----------------------------------+----+--------+ 756 * 757 * V == Valid Bit 758 * U == Unused 759 * PPN == Physical Page Number 760 * 761 * The physical address fields are filled with the results of virt_to_phys() 762 * on the vba. 763 */ 764 765 #if 1 766 #define sba_io_pdir_entry(pdir_ptr, vba) *pdir_ptr = ((vba & ~0xE000000000000FFFULL) \ 767 | 0x8000000000000000ULL) 768 #else 769 void SBA_INLINE 770 sba_io_pdir_entry(u64 *pdir_ptr, unsigned long vba) 771 { 772 *pdir_ptr = ((vba & ~0xE000000000000FFFULL) | 0x80000000000000FFULL); 773 } 774 #endif 775 776 #ifdef ENABLE_MARK_CLEAN 777 /** 778 * Since DMA is i-cache coherent, any (complete) pages that were written via 779 * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to 780 * flush them when they get mapped into an executable vm-area. 781 */ 782 static void 783 mark_clean (void *addr, size_t size) 784 { 785 unsigned long pg_addr, end; 786 787 pg_addr = PAGE_ALIGN((unsigned long) addr); 788 end = (unsigned long) addr + size; 789 while (pg_addr + PAGE_SIZE <= end) { 790 struct page *page = virt_to_page((void *)pg_addr); 791 set_bit(PG_arch_1, &page->flags); 792 pg_addr += PAGE_SIZE; 793 } 794 } 795 #endif 796 797 /** 798 * sba_mark_invalid - invalidate one or more IO PDIR entries 799 * @ioc: IO MMU structure which owns the pdir we are interested in. 800 * @iova: IO Virtual Address mapped earlier 801 * @byte_cnt: number of bytes this mapping covers. 802 * 803 * Marking the IO PDIR entry(ies) as Invalid and invalidate 804 * corresponding IO TLB entry. The PCOM (Purge Command Register) 805 * is to purge stale entries in the IO TLB when unmapping entries. 806 * 807 * The PCOM register supports purging of multiple pages, with a minium 808 * of 1 page and a maximum of 2GB. Hardware requires the address be 809 * aligned to the size of the range being purged. The size of the range 810 * must be a power of 2. The "Cool perf optimization" in the 811 * allocation routine helps keep that true. 812 */ 813 static SBA_INLINE void 814 sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt) 815 { 816 u32 iovp = (u32) SBA_IOVP(ioc,iova); 817 818 int off = PDIR_INDEX(iovp); 819 820 /* Must be non-zero and rounded up */ 821 ASSERT(byte_cnt > 0); 822 ASSERT(0 == (byte_cnt & ~iovp_mask)); 823 824 #ifdef ASSERT_PDIR_SANITY 825 /* Assert first pdir entry is set */ 826 if (!(ioc->pdir_base[off] >> 60)) { 827 sba_dump_pdir_entry(ioc,"sba_mark_invalid()", PDIR_INDEX(iovp)); 828 } 829 #endif 830 831 if (byte_cnt <= iovp_size) 832 { 833 ASSERT(off < ioc->pdir_size); 834 835 iovp |= iovp_shift; /* set "size" field for PCOM */ 836 837 #ifndef FULL_VALID_PDIR 838 /* 839 ** clear I/O PDIR entry "valid" bit 840 ** Do NOT clear the rest - save it for debugging. 841 ** We should only clear bits that have previously 842 ** been enabled. 843 */ 844 ioc->pdir_base[off] &= ~(0x80000000000000FFULL); 845 #else 846 /* 847 ** If we want to maintain the PDIR as valid, put in 848 ** the spill page so devices prefetching won't 849 ** cause a hard fail. 850 */ 851 ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page); 852 #endif 853 } else { 854 u32 t = get_iovp_order(byte_cnt) + iovp_shift; 855 856 iovp |= t; 857 ASSERT(t <= 31); /* 2GB! Max value of "size" field */ 858 859 do { 860 /* verify this pdir entry is enabled */ 861 ASSERT(ioc->pdir_base[off] >> 63); 862 #ifndef FULL_VALID_PDIR 863 /* clear I/O Pdir entry "valid" bit first */ 864 ioc->pdir_base[off] &= ~(0x80000000000000FFULL); 865 #else 866 ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page); 867 #endif 868 off++; 869 byte_cnt -= iovp_size; 870 } while (byte_cnt > 0); 871 } 872 873 WRITE_REG(iovp | ioc->ibase, ioc->ioc_hpa+IOC_PCOM); 874 } 875 876 /** 877 * sba_map_single - map one buffer and return IOVA for DMA 878 * @dev: instance of PCI owned by the driver that's asking. 879 * @addr: driver buffer to map. 880 * @size: number of bytes to map in driver buffer. 881 * @dir: R/W or both. 882 * 883 * See Documentation/DMA-mapping.txt 884 */ 885 dma_addr_t 886 sba_map_single(struct device *dev, void *addr, size_t size, int dir) 887 { 888 struct ioc *ioc; 889 dma_addr_t iovp; 890 dma_addr_t offset; 891 u64 *pdir_start; 892 int pide; 893 #ifdef ASSERT_PDIR_SANITY 894 unsigned long flags; 895 #endif 896 #ifdef ALLOW_IOV_BYPASS 897 unsigned long pci_addr = virt_to_phys(addr); 898 #endif 899 900 #ifdef ALLOW_IOV_BYPASS 901 ASSERT(to_pci_dev(dev)->dma_mask); 902 /* 903 ** Check if the PCI device can DMA to ptr... if so, just return ptr 904 */ 905 if (likely((pci_addr & ~to_pci_dev(dev)->dma_mask) == 0)) { 906 /* 907 ** Device is bit capable of DMA'ing to the buffer... 908 ** just return the PCI address of ptr 909 */ 910 DBG_BYPASS("sba_map_single() bypass mask/addr: 0x%lx/0x%lx\n", 911 to_pci_dev(dev)->dma_mask, pci_addr); 912 return pci_addr; 913 } 914 #endif 915 ioc = GET_IOC(dev); 916 ASSERT(ioc); 917 918 prefetch(ioc->res_hint); 919 920 ASSERT(size > 0); 921 ASSERT(size <= DMA_CHUNK_SIZE); 922 923 /* save offset bits */ 924 offset = ((dma_addr_t) (long) addr) & ~iovp_mask; 925 926 /* round up to nearest iovp_size */ 927 size = (size + offset + ~iovp_mask) & iovp_mask; 928 929 #ifdef ASSERT_PDIR_SANITY 930 spin_lock_irqsave(&ioc->res_lock, flags); 931 if (sba_check_pdir(ioc,"Check before sba_map_single()")) 932 panic("Sanity check failed"); 933 spin_unlock_irqrestore(&ioc->res_lock, flags); 934 #endif 935 936 pide = sba_alloc_range(ioc, size); 937 938 iovp = (dma_addr_t) pide << iovp_shift; 939 940 DBG_RUN("%s() 0x%p -> 0x%lx\n", 941 __FUNCTION__, addr, (long) iovp | offset); 942 943 pdir_start = &(ioc->pdir_base[pide]); 944 945 while (size > 0) { 946 ASSERT(((u8 *)pdir_start)[7] == 0); /* verify availability */ 947 sba_io_pdir_entry(pdir_start, (unsigned long) addr); 948 949 DBG_RUN(" pdir 0x%p %lx\n", pdir_start, *pdir_start); 950 951 addr += iovp_size; 952 size -= iovp_size; 953 pdir_start++; 954 } 955 /* force pdir update */ 956 wmb(); 957 958 /* form complete address */ 959 #ifdef ASSERT_PDIR_SANITY 960 spin_lock_irqsave(&ioc->res_lock, flags); 961 sba_check_pdir(ioc,"Check after sba_map_single()"); 962 spin_unlock_irqrestore(&ioc->res_lock, flags); 963 #endif 964 return SBA_IOVA(ioc, iovp, offset); 965 } 966 967 #ifdef ENABLE_MARK_CLEAN 968 static SBA_INLINE void 969 sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size) 970 { 971 u32 iovp = (u32) SBA_IOVP(ioc,iova); 972 int off = PDIR_INDEX(iovp); 973 void *addr; 974 975 if (size <= iovp_size) { 976 addr = phys_to_virt(ioc->pdir_base[off] & 977 ~0xE000000000000FFFULL); 978 mark_clean(addr, size); 979 } else { 980 do { 981 addr = phys_to_virt(ioc->pdir_base[off] & 982 ~0xE000000000000FFFULL); 983 mark_clean(addr, min(size, iovp_size)); 984 off++; 985 size -= iovp_size; 986 } while (size > 0); 987 } 988 } 989 #endif 990 991 /** 992 * sba_unmap_single - unmap one IOVA and free resources 993 * @dev: instance of PCI owned by the driver that's asking. 994 * @iova: IOVA of driver buffer previously mapped. 995 * @size: number of bytes mapped in driver buffer. 996 * @dir: R/W or both. 997 * 998 * See Documentation/DMA-mapping.txt 999 */ 1000 void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir) 1001 { 1002 struct ioc *ioc; 1003 #if DELAYED_RESOURCE_CNT > 0 1004 struct sba_dma_pair *d; 1005 #endif 1006 unsigned long flags; 1007 dma_addr_t offset; 1008 1009 ioc = GET_IOC(dev); 1010 ASSERT(ioc); 1011 1012 #ifdef ALLOW_IOV_BYPASS 1013 if (likely((iova & ioc->imask) != ioc->ibase)) { 1014 /* 1015 ** Address does not fall w/in IOVA, must be bypassing 1016 */ 1017 DBG_BYPASS("sba_unmap_single() bypass addr: 0x%lx\n", iova); 1018 1019 #ifdef ENABLE_MARK_CLEAN 1020 if (dir == DMA_FROM_DEVICE) { 1021 mark_clean(phys_to_virt(iova), size); 1022 } 1023 #endif 1024 return; 1025 } 1026 #endif 1027 offset = iova & ~iovp_mask; 1028 1029 DBG_RUN("%s() iovp 0x%lx/%x\n", 1030 __FUNCTION__, (long) iova, size); 1031 1032 iova ^= offset; /* clear offset bits */ 1033 size += offset; 1034 size = ROUNDUP(size, iovp_size); 1035 1036 #ifdef ENABLE_MARK_CLEAN 1037 if (dir == DMA_FROM_DEVICE) 1038 sba_mark_clean(ioc, iova, size); 1039 #endif 1040 1041 #if DELAYED_RESOURCE_CNT > 0 1042 spin_lock_irqsave(&ioc->saved_lock, flags); 1043 d = &(ioc->saved[ioc->saved_cnt]); 1044 d->iova = iova; 1045 d->size = size; 1046 if (unlikely(++(ioc->saved_cnt) >= DELAYED_RESOURCE_CNT)) { 1047 int cnt = ioc->saved_cnt; 1048 spin_lock(&ioc->res_lock); 1049 while (cnt--) { 1050 sba_mark_invalid(ioc, d->iova, d->size); 1051 sba_free_range(ioc, d->iova, d->size); 1052 d--; 1053 } 1054 ioc->saved_cnt = 0; 1055 READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ 1056 spin_unlock(&ioc->res_lock); 1057 } 1058 spin_unlock_irqrestore(&ioc->saved_lock, flags); 1059 #else /* DELAYED_RESOURCE_CNT == 0 */ 1060 spin_lock_irqsave(&ioc->res_lock, flags); 1061 sba_mark_invalid(ioc, iova, size); 1062 sba_free_range(ioc, iova, size); 1063 READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ 1064 spin_unlock_irqrestore(&ioc->res_lock, flags); 1065 #endif /* DELAYED_RESOURCE_CNT == 0 */ 1066 } 1067 1068 1069 /** 1070 * sba_alloc_coherent - allocate/map shared mem for DMA 1071 * @dev: instance of PCI owned by the driver that's asking. 1072 * @size: number of bytes mapped in driver buffer. 1073 * @dma_handle: IOVA of new buffer. 1074 * 1075 * See Documentation/DMA-mapping.txt 1076 */ 1077 void * 1078 sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags) 1079 { 1080 struct ioc *ioc; 1081 void *addr; 1082 1083 ioc = GET_IOC(dev); 1084 ASSERT(ioc); 1085 1086 #ifdef CONFIG_NUMA 1087 { 1088 struct page *page; 1089 page = alloc_pages_node(ioc->node == MAX_NUMNODES ? 1090 numa_node_id() : ioc->node, flags, 1091 get_order(size)); 1092 1093 if (unlikely(!page)) 1094 return NULL; 1095 1096 addr = page_address(page); 1097 } 1098 #else 1099 addr = (void *) __get_free_pages(flags, get_order(size)); 1100 #endif 1101 if (unlikely(!addr)) 1102 return NULL; 1103 1104 memset(addr, 0, size); 1105 *dma_handle = virt_to_phys(addr); 1106 1107 #ifdef ALLOW_IOV_BYPASS 1108 ASSERT(dev->coherent_dma_mask); 1109 /* 1110 ** Check if the PCI device can DMA to ptr... if so, just return ptr 1111 */ 1112 if (likely((*dma_handle & ~dev->coherent_dma_mask) == 0)) { 1113 DBG_BYPASS("sba_alloc_coherent() bypass mask/addr: 0x%lx/0x%lx\n", 1114 dev->coherent_dma_mask, *dma_handle); 1115 1116 return addr; 1117 } 1118 #endif 1119 1120 /* 1121 * If device can't bypass or bypass is disabled, pass the 32bit fake 1122 * device to map single to get an iova mapping. 1123 */ 1124 *dma_handle = sba_map_single(&ioc->sac_only_dev->dev, addr, size, 0); 1125 1126 return addr; 1127 } 1128 1129 1130 /** 1131 * sba_free_coherent - free/unmap shared mem for DMA 1132 * @dev: instance of PCI owned by the driver that's asking. 1133 * @size: number of bytes mapped in driver buffer. 1134 * @vaddr: virtual address IOVA of "consistent" buffer. 1135 * @dma_handler: IO virtual address of "consistent" buffer. 1136 * 1137 * See Documentation/DMA-mapping.txt 1138 */ 1139 void sba_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) 1140 { 1141 sba_unmap_single(dev, dma_handle, size, 0); 1142 free_pages((unsigned long) vaddr, get_order(size)); 1143 } 1144 1145 1146 /* 1147 ** Since 0 is a valid pdir_base index value, can't use that 1148 ** to determine if a value is valid or not. Use a flag to indicate 1149 ** the SG list entry contains a valid pdir index. 1150 */ 1151 #define PIDE_FLAG 0x1UL 1152 1153 #ifdef DEBUG_LARGE_SG_ENTRIES 1154 int dump_run_sg = 0; 1155 #endif 1156 1157 1158 /** 1159 * sba_fill_pdir - write allocated SG entries into IO PDIR 1160 * @ioc: IO MMU structure which owns the pdir we are interested in. 1161 * @startsg: list of IOVA/size pairs 1162 * @nents: number of entries in startsg list 1163 * 1164 * Take preprocessed SG list and write corresponding entries 1165 * in the IO PDIR. 1166 */ 1167 1168 static SBA_INLINE int 1169 sba_fill_pdir( 1170 struct ioc *ioc, 1171 struct scatterlist *startsg, 1172 int nents) 1173 { 1174 struct scatterlist *dma_sg = startsg; /* pointer to current DMA */ 1175 int n_mappings = 0; 1176 u64 *pdirp = NULL; 1177 unsigned long dma_offset = 0; 1178 1179 dma_sg--; 1180 while (nents-- > 0) { 1181 int cnt = startsg->dma_length; 1182 startsg->dma_length = 0; 1183 1184 #ifdef DEBUG_LARGE_SG_ENTRIES 1185 if (dump_run_sg) 1186 printk(" %2d : %08lx/%05x %p\n", 1187 nents, startsg->dma_address, cnt, 1188 sba_sg_address(startsg)); 1189 #else 1190 DBG_RUN_SG(" %d : %08lx/%05x %p\n", 1191 nents, startsg->dma_address, cnt, 1192 sba_sg_address(startsg)); 1193 #endif 1194 /* 1195 ** Look for the start of a new DMA stream 1196 */ 1197 if (startsg->dma_address & PIDE_FLAG) { 1198 u32 pide = startsg->dma_address & ~PIDE_FLAG; 1199 dma_offset = (unsigned long) pide & ~iovp_mask; 1200 startsg->dma_address = 0; 1201 dma_sg++; 1202 dma_sg->dma_address = pide | ioc->ibase; 1203 pdirp = &(ioc->pdir_base[pide >> iovp_shift]); 1204 n_mappings++; 1205 } 1206 1207 /* 1208 ** Look for a VCONTIG chunk 1209 */ 1210 if (cnt) { 1211 unsigned long vaddr = (unsigned long) sba_sg_address(startsg); 1212 ASSERT(pdirp); 1213 1214 /* Since multiple Vcontig blocks could make up 1215 ** one DMA stream, *add* cnt to dma_len. 1216 */ 1217 dma_sg->dma_length += cnt; 1218 cnt += dma_offset; 1219 dma_offset=0; /* only want offset on first chunk */ 1220 cnt = ROUNDUP(cnt, iovp_size); 1221 do { 1222 sba_io_pdir_entry(pdirp, vaddr); 1223 vaddr += iovp_size; 1224 cnt -= iovp_size; 1225 pdirp++; 1226 } while (cnt > 0); 1227 } 1228 startsg++; 1229 } 1230 /* force pdir update */ 1231 wmb(); 1232 1233 #ifdef DEBUG_LARGE_SG_ENTRIES 1234 dump_run_sg = 0; 1235 #endif 1236 return(n_mappings); 1237 } 1238 1239 1240 /* 1241 ** Two address ranges are DMA contiguous *iff* "end of prev" and 1242 ** "start of next" are both on an IOV page boundary. 1243 ** 1244 ** (shift left is a quick trick to mask off upper bits) 1245 */ 1246 #define DMA_CONTIG(__X, __Y) \ 1247 (((((unsigned long) __X) | ((unsigned long) __Y)) << (BITS_PER_LONG - iovp_shift)) == 0UL) 1248 1249 1250 /** 1251 * sba_coalesce_chunks - preprocess the SG list 1252 * @ioc: IO MMU structure which owns the pdir we are interested in. 1253 * @startsg: list of IOVA/size pairs 1254 * @nents: number of entries in startsg list 1255 * 1256 * First pass is to walk the SG list and determine where the breaks are 1257 * in the DMA stream. Allocates PDIR entries but does not fill them. 1258 * Returns the number of DMA chunks. 1259 * 1260 * Doing the fill separate from the coalescing/allocation keeps the 1261 * code simpler. Future enhancement could make one pass through 1262 * the sglist do both. 1263 */ 1264 static SBA_INLINE int 1265 sba_coalesce_chunks( struct ioc *ioc, 1266 struct scatterlist *startsg, 1267 int nents) 1268 { 1269 struct scatterlist *vcontig_sg; /* VCONTIG chunk head */ 1270 unsigned long vcontig_len; /* len of VCONTIG chunk */ 1271 unsigned long vcontig_end; 1272 struct scatterlist *dma_sg; /* next DMA stream head */ 1273 unsigned long dma_offset, dma_len; /* start/len of DMA stream */ 1274 int n_mappings = 0; 1275 1276 while (nents > 0) { 1277 unsigned long vaddr = (unsigned long) sba_sg_address(startsg); 1278 1279 /* 1280 ** Prepare for first/next DMA stream 1281 */ 1282 dma_sg = vcontig_sg = startsg; 1283 dma_len = vcontig_len = vcontig_end = startsg->length; 1284 vcontig_end += vaddr; 1285 dma_offset = vaddr & ~iovp_mask; 1286 1287 /* PARANOID: clear entries */ 1288 startsg->dma_address = startsg->dma_length = 0; 1289 1290 /* 1291 ** This loop terminates one iteration "early" since 1292 ** it's always looking one "ahead". 1293 */ 1294 while (--nents > 0) { 1295 unsigned long vaddr; /* tmp */ 1296 1297 startsg++; 1298 1299 /* PARANOID */ 1300 startsg->dma_address = startsg->dma_length = 0; 1301 1302 /* catch brokenness in SCSI layer */ 1303 ASSERT(startsg->length <= DMA_CHUNK_SIZE); 1304 1305 /* 1306 ** First make sure current dma stream won't 1307 ** exceed DMA_CHUNK_SIZE if we coalesce the 1308 ** next entry. 1309 */ 1310 if (((dma_len + dma_offset + startsg->length + ~iovp_mask) & iovp_mask) 1311 > DMA_CHUNK_SIZE) 1312 break; 1313 1314 /* 1315 ** Then look for virtually contiguous blocks. 1316 ** 1317 ** append the next transaction? 1318 */ 1319 vaddr = (unsigned long) sba_sg_address(startsg); 1320 if (vcontig_end == vaddr) 1321 { 1322 vcontig_len += startsg->length; 1323 vcontig_end += startsg->length; 1324 dma_len += startsg->length; 1325 continue; 1326 } 1327 1328 #ifdef DEBUG_LARGE_SG_ENTRIES 1329 dump_run_sg = (vcontig_len > iovp_size); 1330 #endif 1331 1332 /* 1333 ** Not virtually contigous. 1334 ** Terminate prev chunk. 1335 ** Start a new chunk. 1336 ** 1337 ** Once we start a new VCONTIG chunk, dma_offset 1338 ** can't change. And we need the offset from the first 1339 ** chunk - not the last one. Ergo Successive chunks 1340 ** must start on page boundaries and dove tail 1341 ** with it's predecessor. 1342 */ 1343 vcontig_sg->dma_length = vcontig_len; 1344 1345 vcontig_sg = startsg; 1346 vcontig_len = startsg->length; 1347 1348 /* 1349 ** 3) do the entries end/start on page boundaries? 1350 ** Don't update vcontig_end until we've checked. 1351 */ 1352 if (DMA_CONTIG(vcontig_end, vaddr)) 1353 { 1354 vcontig_end = vcontig_len + vaddr; 1355 dma_len += vcontig_len; 1356 continue; 1357 } else { 1358 break; 1359 } 1360 } 1361 1362 /* 1363 ** End of DMA Stream 1364 ** Terminate last VCONTIG block. 1365 ** Allocate space for DMA stream. 1366 */ 1367 vcontig_sg->dma_length = vcontig_len; 1368 dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask; 1369 ASSERT(dma_len <= DMA_CHUNK_SIZE); 1370 dma_sg->dma_address = (dma_addr_t) (PIDE_FLAG 1371 | (sba_alloc_range(ioc, dma_len) << iovp_shift) 1372 | dma_offset); 1373 n_mappings++; 1374 } 1375 1376 return n_mappings; 1377 } 1378 1379 1380 /** 1381 * sba_map_sg - map Scatter/Gather list 1382 * @dev: instance of PCI owned by the driver that's asking. 1383 * @sglist: array of buffer/length pairs 1384 * @nents: number of entries in list 1385 * @dir: R/W or both. 1386 * 1387 * See Documentation/DMA-mapping.txt 1388 */ 1389 int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int dir) 1390 { 1391 struct ioc *ioc; 1392 int coalesced, filled = 0; 1393 #ifdef ASSERT_PDIR_SANITY 1394 unsigned long flags; 1395 #endif 1396 #ifdef ALLOW_IOV_BYPASS_SG 1397 struct scatterlist *sg; 1398 #endif 1399 1400 DBG_RUN_SG("%s() START %d entries\n", __FUNCTION__, nents); 1401 ioc = GET_IOC(dev); 1402 ASSERT(ioc); 1403 1404 #ifdef ALLOW_IOV_BYPASS_SG 1405 ASSERT(to_pci_dev(dev)->dma_mask); 1406 if (likely((ioc->dma_mask & ~to_pci_dev(dev)->dma_mask) == 0)) { 1407 for (sg = sglist ; filled < nents ; filled++, sg++){ 1408 sg->dma_length = sg->length; 1409 sg->dma_address = virt_to_phys(sba_sg_address(sg)); 1410 } 1411 return filled; 1412 } 1413 #endif 1414 /* Fast path single entry scatterlists. */ 1415 if (nents == 1) { 1416 sglist->dma_length = sglist->length; 1417 sglist->dma_address = sba_map_single(dev, sba_sg_address(sglist), sglist->length, dir); 1418 return 1; 1419 } 1420 1421 #ifdef ASSERT_PDIR_SANITY 1422 spin_lock_irqsave(&ioc->res_lock, flags); 1423 if (sba_check_pdir(ioc,"Check before sba_map_sg()")) 1424 { 1425 sba_dump_sg(ioc, sglist, nents); 1426 panic("Check before sba_map_sg()"); 1427 } 1428 spin_unlock_irqrestore(&ioc->res_lock, flags); 1429 #endif 1430 1431 prefetch(ioc->res_hint); 1432 1433 /* 1434 ** First coalesce the chunks and allocate I/O pdir space 1435 ** 1436 ** If this is one DMA stream, we can properly map using the 1437 ** correct virtual address associated with each DMA page. 1438 ** w/o this association, we wouldn't have coherent DMA! 1439 ** Access to the virtual address is what forces a two pass algorithm. 1440 */ 1441 coalesced = sba_coalesce_chunks(ioc, sglist, nents); 1442 1443 /* 1444 ** Program the I/O Pdir 1445 ** 1446 ** map the virtual addresses to the I/O Pdir 1447 ** o dma_address will contain the pdir index 1448 ** o dma_len will contain the number of bytes to map 1449 ** o address contains the virtual address. 1450 */ 1451 filled = sba_fill_pdir(ioc, sglist, nents); 1452 1453 #ifdef ASSERT_PDIR_SANITY 1454 spin_lock_irqsave(&ioc->res_lock, flags); 1455 if (sba_check_pdir(ioc,"Check after sba_map_sg()")) 1456 { 1457 sba_dump_sg(ioc, sglist, nents); 1458 panic("Check after sba_map_sg()\n"); 1459 } 1460 spin_unlock_irqrestore(&ioc->res_lock, flags); 1461 #endif 1462 1463 ASSERT(coalesced == filled); 1464 DBG_RUN_SG("%s() DONE %d mappings\n", __FUNCTION__, filled); 1465 1466 return filled; 1467 } 1468 1469 1470 /** 1471 * sba_unmap_sg - unmap Scatter/Gather list 1472 * @dev: instance of PCI owned by the driver that's asking. 1473 * @sglist: array of buffer/length pairs 1474 * @nents: number of entries in list 1475 * @dir: R/W or both. 1476 * 1477 * See Documentation/DMA-mapping.txt 1478 */ 1479 void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir) 1480 { 1481 #ifdef ASSERT_PDIR_SANITY 1482 struct ioc *ioc; 1483 unsigned long flags; 1484 #endif 1485 1486 DBG_RUN_SG("%s() START %d entries, %p,%x\n", 1487 __FUNCTION__, nents, sba_sg_address(sglist), sglist->length); 1488 1489 #ifdef ASSERT_PDIR_SANITY 1490 ioc = GET_IOC(dev); 1491 ASSERT(ioc); 1492 1493 spin_lock_irqsave(&ioc->res_lock, flags); 1494 sba_check_pdir(ioc,"Check before sba_unmap_sg()"); 1495 spin_unlock_irqrestore(&ioc->res_lock, flags); 1496 #endif 1497 1498 while (nents && sglist->dma_length) { 1499 1500 sba_unmap_single(dev, sglist->dma_address, sglist->dma_length, dir); 1501 sglist++; 1502 nents--; 1503 } 1504 1505 DBG_RUN_SG("%s() DONE (nents %d)\n", __FUNCTION__, nents); 1506 1507 #ifdef ASSERT_PDIR_SANITY 1508 spin_lock_irqsave(&ioc->res_lock, flags); 1509 sba_check_pdir(ioc,"Check after sba_unmap_sg()"); 1510 spin_unlock_irqrestore(&ioc->res_lock, flags); 1511 #endif 1512 1513 } 1514 1515 /************************************************************** 1516 * 1517 * Initialization and claim 1518 * 1519 ***************************************************************/ 1520 1521 static void __init 1522 ioc_iova_init(struct ioc *ioc) 1523 { 1524 int tcnfg; 1525 int agp_found = 0; 1526 struct pci_dev *device = NULL; 1527 #ifdef FULL_VALID_PDIR 1528 unsigned long index; 1529 #endif 1530 1531 /* 1532 ** Firmware programs the base and size of a "safe IOVA space" 1533 ** (one that doesn't overlap memory or LMMIO space) in the 1534 ** IBASE and IMASK registers. 1535 */ 1536 ioc->ibase = READ_REG(ioc->ioc_hpa + IOC_IBASE) & ~0x1UL; 1537 ioc->imask = READ_REG(ioc->ioc_hpa + IOC_IMASK) | 0xFFFFFFFF00000000UL; 1538 1539 ioc->iov_size = ~ioc->imask + 1; 1540 1541 DBG_INIT("%s() hpa %p IOV base 0x%lx mask 0x%lx (%dMB)\n", 1542 __FUNCTION__, ioc->ioc_hpa, ioc->ibase, ioc->imask, 1543 ioc->iov_size >> 20); 1544 1545 switch (iovp_size) { 1546 case 4*1024: tcnfg = 0; break; 1547 case 8*1024: tcnfg = 1; break; 1548 case 16*1024: tcnfg = 2; break; 1549 case 64*1024: tcnfg = 3; break; 1550 default: 1551 panic(PFX "Unsupported IOTLB page size %ldK", 1552 iovp_size >> 10); 1553 break; 1554 } 1555 WRITE_REG(tcnfg, ioc->ioc_hpa + IOC_TCNFG); 1556 1557 ioc->pdir_size = (ioc->iov_size / iovp_size) * PDIR_ENTRY_SIZE; 1558 ioc->pdir_base = (void *) __get_free_pages(GFP_KERNEL, 1559 get_order(ioc->pdir_size)); 1560 if (!ioc->pdir_base) 1561 panic(PFX "Couldn't allocate I/O Page Table\n"); 1562 1563 memset(ioc->pdir_base, 0, ioc->pdir_size); 1564 1565 DBG_INIT("%s() IOV page size %ldK pdir %p size %x\n", __FUNCTION__, 1566 iovp_size >> 10, ioc->pdir_base, ioc->pdir_size); 1567 1568 ASSERT(ALIGN((unsigned long) ioc->pdir_base, 4*1024) == (unsigned long) ioc->pdir_base); 1569 WRITE_REG(virt_to_phys(ioc->pdir_base), ioc->ioc_hpa + IOC_PDIR_BASE); 1570 1571 /* 1572 ** If an AGP device is present, only use half of the IOV space 1573 ** for PCI DMA. Unfortunately we can't know ahead of time 1574 ** whether GART support will actually be used, for now we 1575 ** can just key on an AGP device found in the system. 1576 ** We program the next pdir index after we stop w/ a key for 1577 ** the GART code to handshake on. 1578 */ 1579 for_each_pci_dev(device) 1580 agp_found |= pci_find_capability(device, PCI_CAP_ID_AGP); 1581 1582 if (agp_found && reserve_sba_gart) { 1583 printk(KERN_INFO PFX "reserving %dMb of IOVA space at 0x%lx for agpgart\n", 1584 ioc->iov_size/2 >> 20, ioc->ibase + ioc->iov_size/2); 1585 ioc->pdir_size /= 2; 1586 ((u64 *)ioc->pdir_base)[PDIR_INDEX(ioc->iov_size/2)] = ZX1_SBA_IOMMU_COOKIE; 1587 } 1588 #ifdef FULL_VALID_PDIR 1589 /* 1590 ** Check to see if the spill page has been allocated, we don't need more than 1591 ** one across multiple SBAs. 1592 */ 1593 if (!prefetch_spill_page) { 1594 char *spill_poison = "SBAIOMMU POISON"; 1595 int poison_size = 16; 1596 void *poison_addr, *addr; 1597 1598 addr = (void *)__get_free_pages(GFP_KERNEL, get_order(iovp_size)); 1599 if (!addr) 1600 panic(PFX "Couldn't allocate PDIR spill page\n"); 1601 1602 poison_addr = addr; 1603 for ( ; (u64) poison_addr < addr + iovp_size; poison_addr += poison_size) 1604 memcpy(poison_addr, spill_poison, poison_size); 1605 1606 prefetch_spill_page = virt_to_phys(addr); 1607 1608 DBG_INIT("%s() prefetch spill addr: 0x%lx\n", __FUNCTION__, prefetch_spill_page); 1609 } 1610 /* 1611 ** Set all the PDIR entries valid w/ the spill page as the target 1612 */ 1613 for (index = 0 ; index < (ioc->pdir_size / PDIR_ENTRY_SIZE) ; index++) 1614 ((u64 *)ioc->pdir_base)[index] = (0x80000000000000FF | prefetch_spill_page); 1615 #endif 1616 1617 /* Clear I/O TLB of any possible entries */ 1618 WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) + iovp_shift), ioc->ioc_hpa + IOC_PCOM); 1619 READ_REG(ioc->ioc_hpa + IOC_PCOM); 1620 1621 /* Enable IOVA translation */ 1622 WRITE_REG(ioc->ibase | 1, ioc->ioc_hpa + IOC_IBASE); 1623 READ_REG(ioc->ioc_hpa + IOC_IBASE); 1624 } 1625 1626 static void __init 1627 ioc_resource_init(struct ioc *ioc) 1628 { 1629 spin_lock_init(&ioc->res_lock); 1630 #if DELAYED_RESOURCE_CNT > 0 1631 spin_lock_init(&ioc->saved_lock); 1632 #endif 1633 1634 /* resource map size dictated by pdir_size */ 1635 ioc->res_size = ioc->pdir_size / PDIR_ENTRY_SIZE; /* entries */ 1636 ioc->res_size >>= 3; /* convert bit count to byte count */ 1637 DBG_INIT("%s() res_size 0x%x\n", __FUNCTION__, ioc->res_size); 1638 1639 ioc->res_map = (char *) __get_free_pages(GFP_KERNEL, 1640 get_order(ioc->res_size)); 1641 if (!ioc->res_map) 1642 panic(PFX "Couldn't allocate resource map\n"); 1643 1644 memset(ioc->res_map, 0, ioc->res_size); 1645 /* next available IOVP - circular search */ 1646 ioc->res_hint = (unsigned long *) ioc->res_map; 1647 1648 #ifdef ASSERT_PDIR_SANITY 1649 /* Mark first bit busy - ie no IOVA 0 */ 1650 ioc->res_map[0] = 0x1; 1651 ioc->pdir_base[0] = 0x8000000000000000ULL | ZX1_SBA_IOMMU_COOKIE; 1652 #endif 1653 #ifdef FULL_VALID_PDIR 1654 /* Mark the last resource used so we don't prefetch beyond IOVA space */ 1655 ioc->res_map[ioc->res_size - 1] |= 0x80UL; /* res_map is chars */ 1656 ioc->pdir_base[(ioc->pdir_size / PDIR_ENTRY_SIZE) - 1] = (0x80000000000000FF 1657 | prefetch_spill_page); 1658 #endif 1659 1660 DBG_INIT("%s() res_map %x %p\n", __FUNCTION__, 1661 ioc->res_size, (void *) ioc->res_map); 1662 } 1663 1664 static void __init 1665 ioc_sac_init(struct ioc *ioc) 1666 { 1667 struct pci_dev *sac = NULL; 1668 struct pci_controller *controller = NULL; 1669 1670 /* 1671 * pci_alloc_coherent() must return a DMA address which is 1672 * SAC (single address cycle) addressable, so allocate a 1673 * pseudo-device to enforce that. 1674 */ 1675 sac = kzalloc(sizeof(*sac), GFP_KERNEL); 1676 if (!sac) 1677 panic(PFX "Couldn't allocate struct pci_dev"); 1678 1679 controller = kzalloc(sizeof(*controller), GFP_KERNEL); 1680 if (!controller) 1681 panic(PFX "Couldn't allocate struct pci_controller"); 1682 1683 controller->iommu = ioc; 1684 sac->sysdata = controller; 1685 sac->dma_mask = 0xFFFFFFFFUL; 1686 #ifdef CONFIG_PCI 1687 sac->dev.bus = &pci_bus_type; 1688 #endif 1689 ioc->sac_only_dev = sac; 1690 } 1691 1692 static void __init 1693 ioc_zx1_init(struct ioc *ioc) 1694 { 1695 unsigned long rope_config; 1696 unsigned int i; 1697 1698 if (ioc->rev < 0x20) 1699 panic(PFX "IOC 2.0 or later required for IOMMU support\n"); 1700 1701 /* 38 bit memory controller + extra bit for range displaced by MMIO */ 1702 ioc->dma_mask = (0x1UL << 39) - 1; 1703 1704 /* 1705 ** Clear ROPE(N)_CONFIG AO bit. 1706 ** Disables "NT Ordering" (~= !"Relaxed Ordering") 1707 ** Overrides bit 1 in DMA Hint Sets. 1708 ** Improves netperf UDP_STREAM by ~10% for tg3 on bcm5701. 1709 */ 1710 for (i=0; i<(8*8); i+=8) { 1711 rope_config = READ_REG(ioc->ioc_hpa + IOC_ROPE0_CFG + i); 1712 rope_config &= ~IOC_ROPE_AO; 1713 WRITE_REG(rope_config, ioc->ioc_hpa + IOC_ROPE0_CFG + i); 1714 } 1715 } 1716 1717 typedef void (initfunc)(struct ioc *); 1718 1719 struct ioc_iommu { 1720 u32 func_id; 1721 char *name; 1722 initfunc *init; 1723 }; 1724 1725 static struct ioc_iommu ioc_iommu_info[] __initdata = { 1726 { ZX1_IOC_ID, "zx1", ioc_zx1_init }, 1727 { ZX2_IOC_ID, "zx2", NULL }, 1728 { SX1000_IOC_ID, "sx1000", NULL }, 1729 { SX2000_IOC_ID, "sx2000", NULL }, 1730 }; 1731 1732 static struct ioc * __init 1733 ioc_init(u64 hpa, void *handle) 1734 { 1735 struct ioc *ioc; 1736 struct ioc_iommu *info; 1737 1738 ioc = kzalloc(sizeof(*ioc), GFP_KERNEL); 1739 if (!ioc) 1740 return NULL; 1741 1742 ioc->next = ioc_list; 1743 ioc_list = ioc; 1744 1745 ioc->handle = handle; 1746 ioc->ioc_hpa = ioremap(hpa, 0x1000); 1747 1748 ioc->func_id = READ_REG(ioc->ioc_hpa + IOC_FUNC_ID); 1749 ioc->rev = READ_REG(ioc->ioc_hpa + IOC_FCLASS) & 0xFFUL; 1750 ioc->dma_mask = 0xFFFFFFFFFFFFFFFFUL; /* conservative */ 1751 1752 for (info = ioc_iommu_info; info < ioc_iommu_info + ARRAY_SIZE(ioc_iommu_info); info++) { 1753 if (ioc->func_id == info->func_id) { 1754 ioc->name = info->name; 1755 if (info->init) 1756 (info->init)(ioc); 1757 } 1758 } 1759 1760 iovp_size = (1 << iovp_shift); 1761 iovp_mask = ~(iovp_size - 1); 1762 1763 DBG_INIT("%s: PAGE_SIZE %ldK, iovp_size %ldK\n", __FUNCTION__, 1764 PAGE_SIZE >> 10, iovp_size >> 10); 1765 1766 if (!ioc->name) { 1767 ioc->name = kmalloc(24, GFP_KERNEL); 1768 if (ioc->name) 1769 sprintf((char *) ioc->name, "Unknown (%04x:%04x)", 1770 ioc->func_id & 0xFFFF, (ioc->func_id >> 16) & 0xFFFF); 1771 else 1772 ioc->name = "Unknown"; 1773 } 1774 1775 ioc_iova_init(ioc); 1776 ioc_resource_init(ioc); 1777 ioc_sac_init(ioc); 1778 1779 if ((long) ~iovp_mask > (long) ia64_max_iommu_merge_mask) 1780 ia64_max_iommu_merge_mask = ~iovp_mask; 1781 1782 printk(KERN_INFO PFX 1783 "%s %d.%d HPA 0x%lx IOVA space %dMb at 0x%lx\n", 1784 ioc->name, (ioc->rev >> 4) & 0xF, ioc->rev & 0xF, 1785 hpa, ioc->iov_size >> 20, ioc->ibase); 1786 1787 return ioc; 1788 } 1789 1790 1791 1792 /************************************************************************** 1793 ** 1794 ** SBA initialization code (HW and SW) 1795 ** 1796 ** o identify SBA chip itself 1797 ** o FIXME: initialize DMA hints for reasonable defaults 1798 ** 1799 **************************************************************************/ 1800 1801 #ifdef CONFIG_PROC_FS 1802 static void * 1803 ioc_start(struct seq_file *s, loff_t *pos) 1804 { 1805 struct ioc *ioc; 1806 loff_t n = *pos; 1807 1808 for (ioc = ioc_list; ioc; ioc = ioc->next) 1809 if (!n--) 1810 return ioc; 1811 1812 return NULL; 1813 } 1814 1815 static void * 1816 ioc_next(struct seq_file *s, void *v, loff_t *pos) 1817 { 1818 struct ioc *ioc = v; 1819 1820 ++*pos; 1821 return ioc->next; 1822 } 1823 1824 static void 1825 ioc_stop(struct seq_file *s, void *v) 1826 { 1827 } 1828 1829 static int 1830 ioc_show(struct seq_file *s, void *v) 1831 { 1832 struct ioc *ioc = v; 1833 unsigned long *res_ptr = (unsigned long *)ioc->res_map; 1834 int i, used = 0; 1835 1836 seq_printf(s, "Hewlett Packard %s IOC rev %d.%d\n", 1837 ioc->name, ((ioc->rev >> 4) & 0xF), (ioc->rev & 0xF)); 1838 #ifdef CONFIG_NUMA 1839 if (ioc->node != MAX_NUMNODES) 1840 seq_printf(s, "NUMA node : %d\n", ioc->node); 1841 #endif 1842 seq_printf(s, "IOVA size : %ld MB\n", ((ioc->pdir_size >> 3) * iovp_size)/(1024*1024)); 1843 seq_printf(s, "IOVA page size : %ld kb\n", iovp_size/1024); 1844 1845 for (i = 0; i < (ioc->res_size / sizeof(unsigned long)); ++i, ++res_ptr) 1846 used += hweight64(*res_ptr); 1847 1848 seq_printf(s, "PDIR size : %d entries\n", ioc->pdir_size >> 3); 1849 seq_printf(s, "PDIR used : %d entries\n", used); 1850 1851 #ifdef PDIR_SEARCH_TIMING 1852 { 1853 unsigned long i = 0, avg = 0, min, max; 1854 min = max = ioc->avg_search[0]; 1855 for (i = 0; i < SBA_SEARCH_SAMPLE; i++) { 1856 avg += ioc->avg_search[i]; 1857 if (ioc->avg_search[i] > max) max = ioc->avg_search[i]; 1858 if (ioc->avg_search[i] < min) min = ioc->avg_search[i]; 1859 } 1860 avg /= SBA_SEARCH_SAMPLE; 1861 seq_printf(s, "Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles/IOVA page)\n", 1862 min, avg, max); 1863 } 1864 #endif 1865 #ifndef ALLOW_IOV_BYPASS 1866 seq_printf(s, "IOVA bypass disabled\n"); 1867 #endif 1868 return 0; 1869 } 1870 1871 static struct seq_operations ioc_seq_ops = { 1872 .start = ioc_start, 1873 .next = ioc_next, 1874 .stop = ioc_stop, 1875 .show = ioc_show 1876 }; 1877 1878 static int 1879 ioc_open(struct inode *inode, struct file *file) 1880 { 1881 return seq_open(file, &ioc_seq_ops); 1882 } 1883 1884 static const struct file_operations ioc_fops = { 1885 .open = ioc_open, 1886 .read = seq_read, 1887 .llseek = seq_lseek, 1888 .release = seq_release 1889 }; 1890 1891 static void __init 1892 ioc_proc_init(void) 1893 { 1894 struct proc_dir_entry *dir, *entry; 1895 1896 dir = proc_mkdir("bus/mckinley", NULL); 1897 if (!dir) 1898 return; 1899 1900 entry = create_proc_entry(ioc_list->name, 0, dir); 1901 if (entry) 1902 entry->proc_fops = &ioc_fops; 1903 } 1904 #endif 1905 1906 static void 1907 sba_connect_bus(struct pci_bus *bus) 1908 { 1909 acpi_handle handle, parent; 1910 acpi_status status; 1911 struct ioc *ioc; 1912 1913 if (!PCI_CONTROLLER(bus)) 1914 panic(PFX "no sysdata on bus %d!\n", bus->number); 1915 1916 if (PCI_CONTROLLER(bus)->iommu) 1917 return; 1918 1919 handle = PCI_CONTROLLER(bus)->acpi_handle; 1920 if (!handle) 1921 return; 1922 1923 /* 1924 * The IOC scope encloses PCI root bridges in the ACPI 1925 * namespace, so work our way out until we find an IOC we 1926 * claimed previously. 1927 */ 1928 do { 1929 for (ioc = ioc_list; ioc; ioc = ioc->next) 1930 if (ioc->handle == handle) { 1931 PCI_CONTROLLER(bus)->iommu = ioc; 1932 return; 1933 } 1934 1935 status = acpi_get_parent(handle, &parent); 1936 handle = parent; 1937 } while (ACPI_SUCCESS(status)); 1938 1939 printk(KERN_WARNING "No IOC for PCI Bus %04x:%02x in ACPI\n", pci_domain_nr(bus), bus->number); 1940 } 1941 1942 #ifdef CONFIG_NUMA 1943 static void __init 1944 sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle) 1945 { 1946 unsigned int node; 1947 int pxm; 1948 1949 ioc->node = MAX_NUMNODES; 1950 1951 pxm = acpi_get_pxm(handle); 1952 1953 if (pxm < 0) 1954 return; 1955 1956 node = pxm_to_node(pxm); 1957 1958 if (node >= MAX_NUMNODES || !node_online(node)) 1959 return; 1960 1961 ioc->node = node; 1962 return; 1963 } 1964 #else 1965 #define sba_map_ioc_to_node(ioc, handle) 1966 #endif 1967 1968 static int __init 1969 acpi_sba_ioc_add(struct acpi_device *device) 1970 { 1971 struct ioc *ioc; 1972 acpi_status status; 1973 u64 hpa, length; 1974 struct acpi_buffer buffer; 1975 struct acpi_device_info *dev_info; 1976 1977 status = hp_acpi_csr_space(device->handle, &hpa, &length); 1978 if (ACPI_FAILURE(status)) 1979 return 1; 1980 1981 buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER; 1982 status = acpi_get_object_info(device->handle, &buffer); 1983 if (ACPI_FAILURE(status)) 1984 return 1; 1985 dev_info = buffer.pointer; 1986 1987 /* 1988 * For HWP0001, only SBA appears in ACPI namespace. It encloses the PCI 1989 * root bridges, and its CSR space includes the IOC function. 1990 */ 1991 if (strncmp("HWP0001", dev_info->hardware_id.value, 7) == 0) { 1992 hpa += ZX1_IOC_OFFSET; 1993 /* zx1 based systems default to kernel page size iommu pages */ 1994 if (!iovp_shift) 1995 iovp_shift = min(PAGE_SHIFT, 16); 1996 } 1997 kfree(dev_info); 1998 1999 /* 2000 * default anything not caught above or specified on cmdline to 4k 2001 * iommu page size 2002 */ 2003 if (!iovp_shift) 2004 iovp_shift = 12; 2005 2006 ioc = ioc_init(hpa, device->handle); 2007 if (!ioc) 2008 return 1; 2009 2010 /* setup NUMA node association */ 2011 sba_map_ioc_to_node(ioc, device->handle); 2012 return 0; 2013 } 2014 2015 static struct acpi_driver acpi_sba_ioc_driver = { 2016 .name = "IOC IOMMU Driver", 2017 .ids = "HWP0001,HWP0004", 2018 .ops = { 2019 .add = acpi_sba_ioc_add, 2020 }, 2021 }; 2022 2023 static int __init 2024 sba_init(void) 2025 { 2026 if (!ia64_platform_is("hpzx1") && !ia64_platform_is("hpzx1_swiotlb")) 2027 return 0; 2028 2029 acpi_bus_register_driver(&acpi_sba_ioc_driver); 2030 if (!ioc_list) { 2031 #ifdef CONFIG_IA64_GENERIC 2032 extern int swiotlb_late_init_with_default_size (size_t size); 2033 2034 /* 2035 * If we didn't find something sba_iommu can claim, we 2036 * need to setup the swiotlb and switch to the dig machvec. 2037 */ 2038 if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0) 2039 panic("Unable to find SBA IOMMU or initialize " 2040 "software I/O TLB: Try machvec=dig boot option"); 2041 machvec_init("dig"); 2042 #else 2043 panic("Unable to find SBA IOMMU: Try a generic or DIG kernel"); 2044 #endif 2045 return 0; 2046 } 2047 2048 #if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_HP_ZX1_SWIOTLB) 2049 /* 2050 * hpzx1_swiotlb needs to have a fairly small swiotlb bounce 2051 * buffer setup to support devices with smaller DMA masks than 2052 * sba_iommu can handle. 2053 */ 2054 if (ia64_platform_is("hpzx1_swiotlb")) { 2055 extern void hwsw_init(void); 2056 2057 hwsw_init(); 2058 } 2059 #endif 2060 2061 #ifdef CONFIG_PCI 2062 { 2063 struct pci_bus *b = NULL; 2064 while ((b = pci_find_next_bus(b)) != NULL) 2065 sba_connect_bus(b); 2066 } 2067 #endif 2068 2069 #ifdef CONFIG_PROC_FS 2070 ioc_proc_init(); 2071 #endif 2072 return 0; 2073 } 2074 2075 subsys_initcall(sba_init); /* must be initialized after ACPI etc., but before any drivers... */ 2076 2077 static int __init 2078 nosbagart(char *str) 2079 { 2080 reserve_sba_gart = 0; 2081 return 1; 2082 } 2083 2084 int 2085 sba_dma_supported (struct device *dev, u64 mask) 2086 { 2087 /* make sure it's at least 32bit capable */ 2088 return ((mask & 0xFFFFFFFFUL) == 0xFFFFFFFFUL); 2089 } 2090 2091 int 2092 sba_dma_mapping_error (dma_addr_t dma_addr) 2093 { 2094 return 0; 2095 } 2096 2097 __setup("nosbagart", nosbagart); 2098 2099 static int __init 2100 sba_page_override(char *str) 2101 { 2102 unsigned long page_size; 2103 2104 page_size = memparse(str, &str); 2105 switch (page_size) { 2106 case 4096: 2107 case 8192: 2108 case 16384: 2109 case 65536: 2110 iovp_shift = ffs(page_size) - 1; 2111 break; 2112 default: 2113 printk("%s: unknown/unsupported iommu page size %ld\n", 2114 __FUNCTION__, page_size); 2115 } 2116 2117 return 1; 2118 } 2119 2120 __setup("sbapagesize=",sba_page_override); 2121 2122 EXPORT_SYMBOL(sba_dma_mapping_error); 2123 EXPORT_SYMBOL(sba_map_single); 2124 EXPORT_SYMBOL(sba_unmap_single); 2125 EXPORT_SYMBOL(sba_map_sg); 2126 EXPORT_SYMBOL(sba_unmap_sg); 2127 EXPORT_SYMBOL(sba_dma_supported); 2128 EXPORT_SYMBOL(sba_alloc_coherent); 2129 EXPORT_SYMBOL(sba_free_coherent); 2130