1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 ** IA64 System Bus Adapter (SBA) I/O MMU manager 4 ** 5 ** (c) Copyright 2002-2005 Alex Williamson 6 ** (c) Copyright 2002-2003 Grant Grundler 7 ** (c) Copyright 2002-2005 Hewlett-Packard Company 8 ** 9 ** Portions (c) 2000 Grant Grundler (from parisc I/O MMU code) 10 ** Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code) 11 ** 12 ** 13 ** 14 ** This module initializes the IOC (I/O Controller) found on HP 15 ** McKinley machines and their successors. 16 ** 17 */ 18 19 #include <linux/types.h> 20 #include <linux/kernel.h> 21 #include <linux/module.h> 22 #include <linux/spinlock.h> 23 #include <linux/slab.h> 24 #include <linux/init.h> 25 #include <linux/mm.h> 26 #include <linux/string.h> 27 #include <linux/pci.h> 28 #include <linux/proc_fs.h> 29 #include <linux/seq_file.h> 30 #include <linux/acpi.h> 31 #include <linux/efi.h> 32 #include <linux/nodemask.h> 33 #include <linux/bitops.h> /* hweight64() */ 34 #include <linux/crash_dump.h> 35 #include <linux/iommu-helper.h> 36 #include <linux/dma-map-ops.h> 37 #include <linux/prefetch.h> 38 #include <linux/swiotlb.h> 39 40 #include <asm/delay.h> /* ia64_get_itc() */ 41 #include <asm/io.h> 42 #include <asm/page.h> /* PAGE_OFFSET */ 43 #include <asm/dma.h> 44 45 #include <asm/acpi-ext.h> 46 47 #define PFX "IOC: " 48 49 /* 50 ** Enabling timing search of the pdir resource map. Output in /proc. 51 ** Disabled by default to optimize performance. 52 */ 53 #undef PDIR_SEARCH_TIMING 54 55 /* 56 ** This option allows cards capable of 64bit DMA to bypass the IOMMU. If 57 ** not defined, all DMA will be 32bit and go through the TLB. 58 ** There's potentially a conflict in the bio merge code with us 59 ** advertising an iommu, but then bypassing it. Since I/O MMU bypassing 60 ** appears to give more performance than bio-level virtual merging, we'll 61 ** do the former for now. NOTE: BYPASS_SG also needs to be undef'd to 62 ** completely restrict DMA to the IOMMU. 63 */ 64 #define ALLOW_IOV_BYPASS 65 66 /* 67 ** This option specifically allows/disallows bypassing scatterlists with 68 ** multiple entries. Coalescing these entries can allow better DMA streaming 69 ** and in some cases shows better performance than entirely bypassing the 70 ** IOMMU. Performance increase on the order of 1-2% sequential output/input 71 ** using bonnie++ on a RAID0 MD device (sym2 & mpt). 72 */ 73 #undef ALLOW_IOV_BYPASS_SG 74 75 /* 76 ** If a device prefetches beyond the end of a valid pdir entry, it will cause 77 ** a hard failure, ie. MCA. Version 3.0 and later of the zx1 LBA should 78 ** disconnect on 4k boundaries and prevent such issues. If the device is 79 ** particularly aggressive, this option will keep the entire pdir valid such 80 ** that prefetching will hit a valid address. This could severely impact 81 ** error containment, and is therefore off by default. The page that is 82 ** used for spill-over is poisoned, so that should help debugging somewhat. 83 */ 84 #undef FULL_VALID_PDIR 85 86 #define ENABLE_MARK_CLEAN 87 88 /* 89 ** The number of debug flags is a clue - this code is fragile. NOTE: since 90 ** tightening the use of res_lock the resource bitmap and actual pdir are no 91 ** longer guaranteed to stay in sync. The sanity checking code isn't going to 92 ** like that. 93 */ 94 #undef DEBUG_SBA_INIT 95 #undef DEBUG_SBA_RUN 96 #undef DEBUG_SBA_RUN_SG 97 #undef DEBUG_SBA_RESOURCE 98 #undef ASSERT_PDIR_SANITY 99 #undef DEBUG_LARGE_SG_ENTRIES 100 #undef DEBUG_BYPASS 101 102 #if defined(FULL_VALID_PDIR) && defined(ASSERT_PDIR_SANITY) 103 #error FULL_VALID_PDIR and ASSERT_PDIR_SANITY are mutually exclusive 104 #endif 105 106 #define SBA_INLINE __inline__ 107 /* #define SBA_INLINE */ 108 109 #ifdef DEBUG_SBA_INIT 110 #define DBG_INIT(x...) printk(x) 111 #else 112 #define DBG_INIT(x...) 113 #endif 114 115 #ifdef DEBUG_SBA_RUN 116 #define DBG_RUN(x...) printk(x) 117 #else 118 #define DBG_RUN(x...) 119 #endif 120 121 #ifdef DEBUG_SBA_RUN_SG 122 #define DBG_RUN_SG(x...) printk(x) 123 #else 124 #define DBG_RUN_SG(x...) 125 #endif 126 127 128 #ifdef DEBUG_SBA_RESOURCE 129 #define DBG_RES(x...) printk(x) 130 #else 131 #define DBG_RES(x...) 132 #endif 133 134 #ifdef DEBUG_BYPASS 135 #define DBG_BYPASS(x...) printk(x) 136 #else 137 #define DBG_BYPASS(x...) 138 #endif 139 140 #ifdef ASSERT_PDIR_SANITY 141 #define ASSERT(expr) \ 142 if(!(expr)) { \ 143 printk( "\n" __FILE__ ":%d: Assertion " #expr " failed!\n",__LINE__); \ 144 panic(#expr); \ 145 } 146 #else 147 #define ASSERT(expr) 148 #endif 149 150 /* 151 ** The number of pdir entries to "free" before issuing 152 ** a read to PCOM register to flush out PCOM writes. 153 ** Interacts with allocation granularity (ie 4 or 8 entries 154 ** allocated and free'd/purged at a time might make this 155 ** less interesting). 156 */ 157 #define DELAYED_RESOURCE_CNT 64 158 159 #define PCI_DEVICE_ID_HP_SX2000_IOC 0x12ec 160 161 #define ZX1_IOC_ID ((PCI_DEVICE_ID_HP_ZX1_IOC << 16) | PCI_VENDOR_ID_HP) 162 #define ZX2_IOC_ID ((PCI_DEVICE_ID_HP_ZX2_IOC << 16) | PCI_VENDOR_ID_HP) 163 #define REO_IOC_ID ((PCI_DEVICE_ID_HP_REO_IOC << 16) | PCI_VENDOR_ID_HP) 164 #define SX1000_IOC_ID ((PCI_DEVICE_ID_HP_SX1000_IOC << 16) | PCI_VENDOR_ID_HP) 165 #define SX2000_IOC_ID ((PCI_DEVICE_ID_HP_SX2000_IOC << 16) | PCI_VENDOR_ID_HP) 166 167 #define ZX1_IOC_OFFSET 0x1000 /* ACPI reports SBA, we want IOC */ 168 169 #define IOC_FUNC_ID 0x000 170 #define IOC_FCLASS 0x008 /* function class, bist, header, rev... */ 171 #define IOC_IBASE 0x300 /* IO TLB */ 172 #define IOC_IMASK 0x308 173 #define IOC_PCOM 0x310 174 #define IOC_TCNFG 0x318 175 #define IOC_PDIR_BASE 0x320 176 177 #define IOC_ROPE0_CFG 0x500 178 #define IOC_ROPE_AO 0x10 /* Allow "Relaxed Ordering" */ 179 180 181 /* AGP GART driver looks for this */ 182 #define ZX1_SBA_IOMMU_COOKIE 0x0000badbadc0ffeeUL 183 184 /* 185 ** The zx1 IOC supports 4/8/16/64KB page sizes (see TCNFG register) 186 ** 187 ** Some IOCs (sx1000) can run at the above pages sizes, but are 188 ** really only supported using the IOC at a 4k page size. 189 ** 190 ** iovp_size could only be greater than PAGE_SIZE if we are 191 ** confident the drivers really only touch the next physical 192 ** page iff that driver instance owns it. 193 */ 194 static unsigned long iovp_size; 195 static unsigned long iovp_shift; 196 static unsigned long iovp_mask; 197 198 struct ioc { 199 void __iomem *ioc_hpa; /* I/O MMU base address */ 200 char *res_map; /* resource map, bit == pdir entry */ 201 u64 *pdir_base; /* physical base address */ 202 unsigned long ibase; /* pdir IOV Space base */ 203 unsigned long imask; /* pdir IOV Space mask */ 204 205 unsigned long *res_hint; /* next avail IOVP - circular search */ 206 unsigned long dma_mask; 207 spinlock_t res_lock; /* protects the resource bitmap, but must be held when */ 208 /* clearing pdir to prevent races with allocations. */ 209 unsigned int res_bitshift; /* from the RIGHT! */ 210 unsigned int res_size; /* size of resource map in bytes */ 211 #ifdef CONFIG_NUMA 212 unsigned int node; /* node where this IOC lives */ 213 #endif 214 #if DELAYED_RESOURCE_CNT > 0 215 spinlock_t saved_lock; /* may want to try to get this on a separate cacheline */ 216 /* than res_lock for bigger systems. */ 217 int saved_cnt; 218 struct sba_dma_pair { 219 dma_addr_t iova; 220 size_t size; 221 } saved[DELAYED_RESOURCE_CNT]; 222 #endif 223 224 #ifdef PDIR_SEARCH_TIMING 225 #define SBA_SEARCH_SAMPLE 0x100 226 unsigned long avg_search[SBA_SEARCH_SAMPLE]; 227 unsigned long avg_idx; /* current index into avg_search */ 228 #endif 229 230 /* Stuff we don't need in performance path */ 231 struct ioc *next; /* list of IOC's in system */ 232 acpi_handle handle; /* for multiple IOC's */ 233 const char *name; 234 unsigned int func_id; 235 unsigned int rev; /* HW revision of chip */ 236 u32 iov_size; 237 unsigned int pdir_size; /* in bytes, determined by IOV Space size */ 238 struct pci_dev *sac_only_dev; 239 }; 240 241 static struct ioc *ioc_list, *ioc_found; 242 static int reserve_sba_gart = 1; 243 244 static SBA_INLINE void sba_mark_invalid(struct ioc *, dma_addr_t, size_t); 245 static SBA_INLINE void sba_free_range(struct ioc *, dma_addr_t, size_t); 246 247 #define sba_sg_address(sg) sg_virt((sg)) 248 249 #ifdef FULL_VALID_PDIR 250 static u64 prefetch_spill_page; 251 #endif 252 253 #define GET_IOC(dev) ((dev_is_pci(dev)) \ 254 ? ((struct ioc *) PCI_CONTROLLER(to_pci_dev(dev))->iommu) : NULL) 255 256 /* 257 ** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up 258 ** (or rather not merge) DMAs into manageable chunks. 259 ** On parisc, this is more of the software/tuning constraint 260 ** rather than the HW. I/O MMU allocation algorithms can be 261 ** faster with smaller sizes (to some degree). 262 */ 263 #define DMA_CHUNK_SIZE (BITS_PER_LONG*iovp_size) 264 265 #define ROUNDUP(x,y) ((x + ((y)-1)) & ~((y)-1)) 266 267 /************************************ 268 ** SBA register read and write support 269 ** 270 ** BE WARNED: register writes are posted. 271 ** (ie follow writes which must reach HW with a read) 272 ** 273 */ 274 #define READ_REG(addr) __raw_readq(addr) 275 #define WRITE_REG(val, addr) __raw_writeq(val, addr) 276 277 #ifdef DEBUG_SBA_INIT 278 279 /** 280 * sba_dump_tlb - debugging only - print IOMMU operating parameters 281 * @hpa: base address of the IOMMU 282 * 283 * Print the size/location of the IO MMU PDIR. 284 */ 285 static void 286 sba_dump_tlb(char *hpa) 287 { 288 DBG_INIT("IO TLB at 0x%p\n", (void *)hpa); 289 DBG_INIT("IOC_IBASE : %016lx\n", READ_REG(hpa+IOC_IBASE)); 290 DBG_INIT("IOC_IMASK : %016lx\n", READ_REG(hpa+IOC_IMASK)); 291 DBG_INIT("IOC_TCNFG : %016lx\n", READ_REG(hpa+IOC_TCNFG)); 292 DBG_INIT("IOC_PDIR_BASE: %016lx\n", READ_REG(hpa+IOC_PDIR_BASE)); 293 DBG_INIT("\n"); 294 } 295 #endif 296 297 298 #ifdef ASSERT_PDIR_SANITY 299 300 /** 301 * sba_dump_pdir_entry - debugging only - print one IOMMU PDIR entry 302 * @ioc: IO MMU structure which owns the pdir we are interested in. 303 * @msg: text to print ont the output line. 304 * @pide: pdir index. 305 * 306 * Print one entry of the IO MMU PDIR in human readable form. 307 */ 308 static void 309 sba_dump_pdir_entry(struct ioc *ioc, char *msg, uint pide) 310 { 311 /* start printing from lowest pde in rval */ 312 u64 *ptr = &ioc->pdir_base[pide & ~(BITS_PER_LONG - 1)]; 313 unsigned long *rptr = (unsigned long *) &ioc->res_map[(pide >>3) & -sizeof(unsigned long)]; 314 uint rcnt; 315 316 printk(KERN_DEBUG "SBA: %s rp %p bit %d rval 0x%lx\n", 317 msg, rptr, pide & (BITS_PER_LONG - 1), *rptr); 318 319 rcnt = 0; 320 while (rcnt < BITS_PER_LONG) { 321 printk(KERN_DEBUG "%s %2d %p %016Lx\n", 322 (rcnt == (pide & (BITS_PER_LONG - 1))) 323 ? " -->" : " ", 324 rcnt, ptr, (unsigned long long) *ptr ); 325 rcnt++; 326 ptr++; 327 } 328 printk(KERN_DEBUG "%s", msg); 329 } 330 331 332 /** 333 * sba_check_pdir - debugging only - consistency checker 334 * @ioc: IO MMU structure which owns the pdir we are interested in. 335 * @msg: text to print ont the output line. 336 * 337 * Verify the resource map and pdir state is consistent 338 */ 339 static int 340 sba_check_pdir(struct ioc *ioc, char *msg) 341 { 342 u64 *rptr_end = (u64 *) &(ioc->res_map[ioc->res_size]); 343 u64 *rptr = (u64 *) ioc->res_map; /* resource map ptr */ 344 u64 *pptr = ioc->pdir_base; /* pdir ptr */ 345 uint pide = 0; 346 347 while (rptr < rptr_end) { 348 u64 rval; 349 int rcnt; /* number of bits we might check */ 350 351 rval = *rptr; 352 rcnt = 64; 353 354 while (rcnt) { 355 /* Get last byte and highest bit from that */ 356 u32 pde = ((u32)((*pptr >> (63)) & 0x1)); 357 if ((rval & 0x1) ^ pde) 358 { 359 /* 360 ** BUMMER! -- res_map != pdir -- 361 ** Dump rval and matching pdir entries 362 */ 363 sba_dump_pdir_entry(ioc, msg, pide); 364 return(1); 365 } 366 rcnt--; 367 rval >>= 1; /* try the next bit */ 368 pptr++; 369 pide++; 370 } 371 rptr++; /* look at next word of res_map */ 372 } 373 /* It'd be nice if we always got here :^) */ 374 return 0; 375 } 376 377 378 /** 379 * sba_dump_sg - debugging only - print Scatter-Gather list 380 * @ioc: IO MMU structure which owns the pdir we are interested in. 381 * @startsg: head of the SG list 382 * @nents: number of entries in SG list 383 * 384 * print the SG list so we can verify it's correct by hand. 385 */ 386 static void 387 sba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents) 388 { 389 while (nents-- > 0) { 390 printk(KERN_DEBUG " %d : DMA %08lx/%05x CPU %p\n", nents, 391 startsg->dma_address, startsg->dma_length, 392 sba_sg_address(startsg)); 393 startsg = sg_next(startsg); 394 } 395 } 396 397 static void 398 sba_check_sg( struct ioc *ioc, struct scatterlist *startsg, int nents) 399 { 400 struct scatterlist *the_sg = startsg; 401 int the_nents = nents; 402 403 while (the_nents-- > 0) { 404 if (sba_sg_address(the_sg) == 0x0UL) 405 sba_dump_sg(NULL, startsg, nents); 406 the_sg = sg_next(the_sg); 407 } 408 } 409 410 #endif /* ASSERT_PDIR_SANITY */ 411 412 413 414 415 /************************************************************** 416 * 417 * I/O Pdir Resource Management 418 * 419 * Bits set in the resource map are in use. 420 * Each bit can represent a number of pages. 421 * LSbs represent lower addresses (IOVA's). 422 * 423 ***************************************************************/ 424 #define PAGES_PER_RANGE 1 /* could increase this to 4 or 8 if needed */ 425 426 /* Convert from IOVP to IOVA and vice versa. */ 427 #define SBA_IOVA(ioc,iovp,offset) ((ioc->ibase) | (iovp) | (offset)) 428 #define SBA_IOVP(ioc,iova) ((iova) & ~(ioc->ibase)) 429 430 #define PDIR_ENTRY_SIZE sizeof(u64) 431 432 #define PDIR_INDEX(iovp) ((iovp)>>iovp_shift) 433 434 #define RESMAP_MASK(n) ~(~0UL << (n)) 435 #define RESMAP_IDX_MASK (sizeof(unsigned long) - 1) 436 437 438 /** 439 * For most cases the normal get_order is sufficient, however it limits us 440 * to PAGE_SIZE being the minimum mapping alignment and TC flush granularity. 441 * It only incurs about 1 clock cycle to use this one with the static variable 442 * and makes the code more intuitive. 443 */ 444 static SBA_INLINE int 445 get_iovp_order (unsigned long size) 446 { 447 long double d = size - 1; 448 long order; 449 450 order = ia64_getf_exp(d); 451 order = order - iovp_shift - 0xffff + 1; 452 if (order < 0) 453 order = 0; 454 return order; 455 } 456 457 static unsigned long ptr_to_pide(struct ioc *ioc, unsigned long *res_ptr, 458 unsigned int bitshiftcnt) 459 { 460 return (((unsigned long)res_ptr - (unsigned long)ioc->res_map) << 3) 461 + bitshiftcnt; 462 } 463 464 /** 465 * sba_search_bitmap - find free space in IO PDIR resource bitmap 466 * @ioc: IO MMU structure which owns the pdir we are interested in. 467 * @bits_wanted: number of entries we need. 468 * @use_hint: use res_hint to indicate where to start looking 469 * 470 * Find consecutive free bits in resource bitmap. 471 * Each bit represents one entry in the IO Pdir. 472 * Cool perf optimization: search for log2(size) bits at a time. 473 */ 474 static SBA_INLINE unsigned long 475 sba_search_bitmap(struct ioc *ioc, struct device *dev, 476 unsigned long bits_wanted, int use_hint) 477 { 478 unsigned long *res_ptr; 479 unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]); 480 unsigned long flags, pide = ~0UL, tpide; 481 unsigned long boundary_size; 482 unsigned long shift; 483 int ret; 484 485 ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0); 486 ASSERT(res_ptr < res_end); 487 488 boundary_size = dma_get_seg_boundary_nr_pages(dev, iovp_shift); 489 490 BUG_ON(ioc->ibase & ~iovp_mask); 491 shift = ioc->ibase >> iovp_shift; 492 493 spin_lock_irqsave(&ioc->res_lock, flags); 494 495 /* Allow caller to force a search through the entire resource space */ 496 if (likely(use_hint)) { 497 res_ptr = ioc->res_hint; 498 } else { 499 res_ptr = (ulong *)ioc->res_map; 500 ioc->res_bitshift = 0; 501 } 502 503 /* 504 * N.B. REO/Grande defect AR2305 can cause TLB fetch timeouts 505 * if a TLB entry is purged while in use. sba_mark_invalid() 506 * purges IOTLB entries in power-of-two sizes, so we also 507 * allocate IOVA space in power-of-two sizes. 508 */ 509 bits_wanted = 1UL << get_iovp_order(bits_wanted << iovp_shift); 510 511 if (likely(bits_wanted == 1)) { 512 unsigned int bitshiftcnt; 513 for(; res_ptr < res_end ; res_ptr++) { 514 if (likely(*res_ptr != ~0UL)) { 515 bitshiftcnt = ffz(*res_ptr); 516 *res_ptr |= (1UL << bitshiftcnt); 517 pide = ptr_to_pide(ioc, res_ptr, bitshiftcnt); 518 ioc->res_bitshift = bitshiftcnt + bits_wanted; 519 goto found_it; 520 } 521 } 522 goto not_found; 523 524 } 525 526 if (likely(bits_wanted <= BITS_PER_LONG/2)) { 527 /* 528 ** Search the resource bit map on well-aligned values. 529 ** "o" is the alignment. 530 ** We need the alignment to invalidate I/O TLB using 531 ** SBA HW features in the unmap path. 532 */ 533 unsigned long o = 1 << get_iovp_order(bits_wanted << iovp_shift); 534 uint bitshiftcnt = ROUNDUP(ioc->res_bitshift, o); 535 unsigned long mask, base_mask; 536 537 base_mask = RESMAP_MASK(bits_wanted); 538 mask = base_mask << bitshiftcnt; 539 540 DBG_RES("%s() o %ld %p", __func__, o, res_ptr); 541 for(; res_ptr < res_end ; res_ptr++) 542 { 543 DBG_RES(" %p %lx %lx\n", res_ptr, mask, *res_ptr); 544 ASSERT(0 != mask); 545 for (; mask ; mask <<= o, bitshiftcnt += o) { 546 tpide = ptr_to_pide(ioc, res_ptr, bitshiftcnt); 547 ret = iommu_is_span_boundary(tpide, bits_wanted, 548 shift, 549 boundary_size); 550 if ((0 == ((*res_ptr) & mask)) && !ret) { 551 *res_ptr |= mask; /* mark resources busy! */ 552 pide = tpide; 553 ioc->res_bitshift = bitshiftcnt + bits_wanted; 554 goto found_it; 555 } 556 } 557 558 bitshiftcnt = 0; 559 mask = base_mask; 560 561 } 562 563 } else { 564 int qwords, bits, i; 565 unsigned long *end; 566 567 qwords = bits_wanted >> 6; /* /64 */ 568 bits = bits_wanted - (qwords * BITS_PER_LONG); 569 570 end = res_end - qwords; 571 572 for (; res_ptr < end; res_ptr++) { 573 tpide = ptr_to_pide(ioc, res_ptr, 0); 574 ret = iommu_is_span_boundary(tpide, bits_wanted, 575 shift, boundary_size); 576 if (ret) 577 goto next_ptr; 578 for (i = 0 ; i < qwords ; i++) { 579 if (res_ptr[i] != 0) 580 goto next_ptr; 581 } 582 if (bits && res_ptr[i] && (__ffs(res_ptr[i]) < bits)) 583 continue; 584 585 /* Found it, mark it */ 586 for (i = 0 ; i < qwords ; i++) 587 res_ptr[i] = ~0UL; 588 res_ptr[i] |= RESMAP_MASK(bits); 589 590 pide = tpide; 591 res_ptr += qwords; 592 ioc->res_bitshift = bits; 593 goto found_it; 594 next_ptr: 595 ; 596 } 597 } 598 599 not_found: 600 prefetch(ioc->res_map); 601 ioc->res_hint = (unsigned long *) ioc->res_map; 602 ioc->res_bitshift = 0; 603 spin_unlock_irqrestore(&ioc->res_lock, flags); 604 return (pide); 605 606 found_it: 607 ioc->res_hint = res_ptr; 608 spin_unlock_irqrestore(&ioc->res_lock, flags); 609 return (pide); 610 } 611 612 613 /** 614 * sba_alloc_range - find free bits and mark them in IO PDIR resource bitmap 615 * @ioc: IO MMU structure which owns the pdir we are interested in. 616 * @size: number of bytes to create a mapping for 617 * 618 * Given a size, find consecutive unmarked and then mark those bits in the 619 * resource bit map. 620 */ 621 static int 622 sba_alloc_range(struct ioc *ioc, struct device *dev, size_t size) 623 { 624 unsigned int pages_needed = size >> iovp_shift; 625 #ifdef PDIR_SEARCH_TIMING 626 unsigned long itc_start; 627 #endif 628 unsigned long pide; 629 630 ASSERT(pages_needed); 631 ASSERT(0 == (size & ~iovp_mask)); 632 633 #ifdef PDIR_SEARCH_TIMING 634 itc_start = ia64_get_itc(); 635 #endif 636 /* 637 ** "seek and ye shall find"...praying never hurts either... 638 */ 639 pide = sba_search_bitmap(ioc, dev, pages_needed, 1); 640 if (unlikely(pide >= (ioc->res_size << 3))) { 641 pide = sba_search_bitmap(ioc, dev, pages_needed, 0); 642 if (unlikely(pide >= (ioc->res_size << 3))) { 643 #if DELAYED_RESOURCE_CNT > 0 644 unsigned long flags; 645 646 /* 647 ** With delayed resource freeing, we can give this one more shot. We're 648 ** getting close to being in trouble here, so do what we can to make this 649 ** one count. 650 */ 651 spin_lock_irqsave(&ioc->saved_lock, flags); 652 if (ioc->saved_cnt > 0) { 653 struct sba_dma_pair *d; 654 int cnt = ioc->saved_cnt; 655 656 d = &(ioc->saved[ioc->saved_cnt - 1]); 657 658 spin_lock(&ioc->res_lock); 659 while (cnt--) { 660 sba_mark_invalid(ioc, d->iova, d->size); 661 sba_free_range(ioc, d->iova, d->size); 662 d--; 663 } 664 ioc->saved_cnt = 0; 665 READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ 666 spin_unlock(&ioc->res_lock); 667 } 668 spin_unlock_irqrestore(&ioc->saved_lock, flags); 669 670 pide = sba_search_bitmap(ioc, dev, pages_needed, 0); 671 if (unlikely(pide >= (ioc->res_size << 3))) { 672 printk(KERN_WARNING "%s: I/O MMU @ %p is" 673 "out of mapping resources, %u %u %lx\n", 674 __func__, ioc->ioc_hpa, ioc->res_size, 675 pages_needed, dma_get_seg_boundary(dev)); 676 return -1; 677 } 678 #else 679 printk(KERN_WARNING "%s: I/O MMU @ %p is" 680 "out of mapping resources, %u %u %lx\n", 681 __func__, ioc->ioc_hpa, ioc->res_size, 682 pages_needed, dma_get_seg_boundary(dev)); 683 return -1; 684 #endif 685 } 686 } 687 688 #ifdef PDIR_SEARCH_TIMING 689 ioc->avg_search[ioc->avg_idx++] = (ia64_get_itc() - itc_start) / pages_needed; 690 ioc->avg_idx &= SBA_SEARCH_SAMPLE - 1; 691 #endif 692 693 prefetchw(&(ioc->pdir_base[pide])); 694 695 #ifdef ASSERT_PDIR_SANITY 696 /* verify the first enable bit is clear */ 697 if(0x00 != ((u8 *) ioc->pdir_base)[pide*PDIR_ENTRY_SIZE + 7]) { 698 sba_dump_pdir_entry(ioc, "sba_search_bitmap() botched it?", pide); 699 } 700 #endif 701 702 DBG_RES("%s(%x) %d -> %lx hint %x/%x\n", 703 __func__, size, pages_needed, pide, 704 (uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map), 705 ioc->res_bitshift ); 706 707 return (pide); 708 } 709 710 711 /** 712 * sba_free_range - unmark bits in IO PDIR resource bitmap 713 * @ioc: IO MMU structure which owns the pdir we are interested in. 714 * @iova: IO virtual address which was previously allocated. 715 * @size: number of bytes to create a mapping for 716 * 717 * clear bits in the ioc's resource map 718 */ 719 static SBA_INLINE void 720 sba_free_range(struct ioc *ioc, dma_addr_t iova, size_t size) 721 { 722 unsigned long iovp = SBA_IOVP(ioc, iova); 723 unsigned int pide = PDIR_INDEX(iovp); 724 unsigned int ridx = pide >> 3; /* convert bit to byte address */ 725 unsigned long *res_ptr = (unsigned long *) &((ioc)->res_map[ridx & ~RESMAP_IDX_MASK]); 726 int bits_not_wanted = size >> iovp_shift; 727 unsigned long m; 728 729 /* Round up to power-of-two size: see AR2305 note above */ 730 bits_not_wanted = 1UL << get_iovp_order(bits_not_wanted << iovp_shift); 731 for (; bits_not_wanted > 0 ; res_ptr++) { 732 733 if (unlikely(bits_not_wanted > BITS_PER_LONG)) { 734 735 /* these mappings start 64bit aligned */ 736 *res_ptr = 0UL; 737 bits_not_wanted -= BITS_PER_LONG; 738 pide += BITS_PER_LONG; 739 740 } else { 741 742 /* 3-bits "bit" address plus 2 (or 3) bits for "byte" == bit in word */ 743 m = RESMAP_MASK(bits_not_wanted) << (pide & (BITS_PER_LONG - 1)); 744 bits_not_wanted = 0; 745 746 DBG_RES("%s( ,%x,%x) %x/%lx %x %p %lx\n", __func__, (uint) iova, size, 747 bits_not_wanted, m, pide, res_ptr, *res_ptr); 748 749 ASSERT(m != 0); 750 ASSERT(bits_not_wanted); 751 ASSERT((*res_ptr & m) == m); /* verify same bits are set */ 752 *res_ptr &= ~m; 753 } 754 } 755 } 756 757 758 /************************************************************** 759 * 760 * "Dynamic DMA Mapping" support (aka "Coherent I/O") 761 * 762 ***************************************************************/ 763 764 /** 765 * sba_io_pdir_entry - fill in one IO PDIR entry 766 * @pdir_ptr: pointer to IO PDIR entry 767 * @vba: Virtual CPU address of buffer to map 768 * 769 * SBA Mapping Routine 770 * 771 * Given a virtual address (vba, arg1) sba_io_pdir_entry() 772 * loads the I/O PDIR entry pointed to by pdir_ptr (arg0). 773 * Each IO Pdir entry consists of 8 bytes as shown below 774 * (LSB == bit 0): 775 * 776 * 63 40 11 7 0 777 * +-+---------------------+----------------------------------+----+--------+ 778 * |V| U | PPN[39:12] | U | FF | 779 * +-+---------------------+----------------------------------+----+--------+ 780 * 781 * V == Valid Bit 782 * U == Unused 783 * PPN == Physical Page Number 784 * 785 * The physical address fields are filled with the results of virt_to_phys() 786 * on the vba. 787 */ 788 789 #if 1 790 #define sba_io_pdir_entry(pdir_ptr, vba) *pdir_ptr = ((vba & ~0xE000000000000FFFULL) \ 791 | 0x8000000000000000ULL) 792 #else 793 void SBA_INLINE 794 sba_io_pdir_entry(u64 *pdir_ptr, unsigned long vba) 795 { 796 *pdir_ptr = ((vba & ~0xE000000000000FFFULL) | 0x80000000000000FFULL); 797 } 798 #endif 799 800 #ifdef ENABLE_MARK_CLEAN 801 /* 802 * Since DMA is i-cache coherent, any (complete) pages that were written via 803 * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to 804 * flush them when they get mapped into an executable vm-area. 805 */ 806 static void mark_clean(void *addr, size_t size) 807 { 808 struct folio *folio = virt_to_folio(addr); 809 ssize_t left = size; 810 size_t offset = offset_in_folio(folio, addr); 811 812 if (offset) { 813 left -= folio_size(folio) - offset; 814 if (left <= 0) 815 return; 816 folio = folio_next(folio); 817 } 818 819 while (left >= folio_size(folio)) { 820 left -= folio_size(folio); 821 set_bit(PG_arch_1, &folio->flags); 822 if (!left) 823 break; 824 folio = folio_next(folio); 825 } 826 } 827 #endif 828 829 /** 830 * sba_mark_invalid - invalidate one or more IO PDIR entries 831 * @ioc: IO MMU structure which owns the pdir we are interested in. 832 * @iova: IO Virtual Address mapped earlier 833 * @byte_cnt: number of bytes this mapping covers. 834 * 835 * Marking the IO PDIR entry(ies) as Invalid and invalidate 836 * corresponding IO TLB entry. The PCOM (Purge Command Register) 837 * is to purge stale entries in the IO TLB when unmapping entries. 838 * 839 * The PCOM register supports purging of multiple pages, with a minium 840 * of 1 page and a maximum of 2GB. Hardware requires the address be 841 * aligned to the size of the range being purged. The size of the range 842 * must be a power of 2. The "Cool perf optimization" in the 843 * allocation routine helps keep that true. 844 */ 845 static SBA_INLINE void 846 sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt) 847 { 848 u32 iovp = (u32) SBA_IOVP(ioc,iova); 849 850 int off = PDIR_INDEX(iovp); 851 852 /* Must be non-zero and rounded up */ 853 ASSERT(byte_cnt > 0); 854 ASSERT(0 == (byte_cnt & ~iovp_mask)); 855 856 #ifdef ASSERT_PDIR_SANITY 857 /* Assert first pdir entry is set */ 858 if (!(ioc->pdir_base[off] >> 60)) { 859 sba_dump_pdir_entry(ioc,"sba_mark_invalid()", PDIR_INDEX(iovp)); 860 } 861 #endif 862 863 if (byte_cnt <= iovp_size) 864 { 865 ASSERT(off < ioc->pdir_size); 866 867 iovp |= iovp_shift; /* set "size" field for PCOM */ 868 869 #ifndef FULL_VALID_PDIR 870 /* 871 ** clear I/O PDIR entry "valid" bit 872 ** Do NOT clear the rest - save it for debugging. 873 ** We should only clear bits that have previously 874 ** been enabled. 875 */ 876 ioc->pdir_base[off] &= ~(0x80000000000000FFULL); 877 #else 878 /* 879 ** If we want to maintain the PDIR as valid, put in 880 ** the spill page so devices prefetching won't 881 ** cause a hard fail. 882 */ 883 ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page); 884 #endif 885 } else { 886 u32 t = get_iovp_order(byte_cnt) + iovp_shift; 887 888 iovp |= t; 889 ASSERT(t <= 31); /* 2GB! Max value of "size" field */ 890 891 do { 892 /* verify this pdir entry is enabled */ 893 ASSERT(ioc->pdir_base[off] >> 63); 894 #ifndef FULL_VALID_PDIR 895 /* clear I/O Pdir entry "valid" bit first */ 896 ioc->pdir_base[off] &= ~(0x80000000000000FFULL); 897 #else 898 ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page); 899 #endif 900 off++; 901 byte_cnt -= iovp_size; 902 } while (byte_cnt > 0); 903 } 904 905 WRITE_REG(iovp | ioc->ibase, ioc->ioc_hpa+IOC_PCOM); 906 } 907 908 /** 909 * sba_map_page - map one buffer and return IOVA for DMA 910 * @dev: instance of PCI owned by the driver that's asking. 911 * @page: page to map 912 * @poff: offset into page 913 * @size: number of bytes to map 914 * @dir: dma direction 915 * @attrs: optional dma attributes 916 * 917 * See Documentation/core-api/dma-api-howto.rst 918 */ 919 static dma_addr_t sba_map_page(struct device *dev, struct page *page, 920 unsigned long poff, size_t size, 921 enum dma_data_direction dir, 922 unsigned long attrs) 923 { 924 struct ioc *ioc; 925 void *addr = page_address(page) + poff; 926 dma_addr_t iovp; 927 dma_addr_t offset; 928 u64 *pdir_start; 929 int pide; 930 #ifdef ASSERT_PDIR_SANITY 931 unsigned long flags; 932 #endif 933 #ifdef ALLOW_IOV_BYPASS 934 unsigned long pci_addr = virt_to_phys(addr); 935 #endif 936 937 #ifdef ALLOW_IOV_BYPASS 938 ASSERT(to_pci_dev(dev)->dma_mask); 939 /* 940 ** Check if the PCI device can DMA to ptr... if so, just return ptr 941 */ 942 if (likely((pci_addr & ~to_pci_dev(dev)->dma_mask) == 0)) { 943 /* 944 ** Device is bit capable of DMA'ing to the buffer... 945 ** just return the PCI address of ptr 946 */ 947 DBG_BYPASS("sba_map_page() bypass mask/addr: " 948 "0x%lx/0x%lx\n", 949 to_pci_dev(dev)->dma_mask, pci_addr); 950 return pci_addr; 951 } 952 #endif 953 ioc = GET_IOC(dev); 954 ASSERT(ioc); 955 956 prefetch(ioc->res_hint); 957 958 ASSERT(size > 0); 959 ASSERT(size <= DMA_CHUNK_SIZE); 960 961 /* save offset bits */ 962 offset = ((dma_addr_t) (long) addr) & ~iovp_mask; 963 964 /* round up to nearest iovp_size */ 965 size = (size + offset + ~iovp_mask) & iovp_mask; 966 967 #ifdef ASSERT_PDIR_SANITY 968 spin_lock_irqsave(&ioc->res_lock, flags); 969 if (sba_check_pdir(ioc,"Check before sba_map_page()")) 970 panic("Sanity check failed"); 971 spin_unlock_irqrestore(&ioc->res_lock, flags); 972 #endif 973 974 pide = sba_alloc_range(ioc, dev, size); 975 if (pide < 0) 976 return DMA_MAPPING_ERROR; 977 978 iovp = (dma_addr_t) pide << iovp_shift; 979 980 DBG_RUN("%s() 0x%p -> 0x%lx\n", __func__, addr, (long) iovp | offset); 981 982 pdir_start = &(ioc->pdir_base[pide]); 983 984 while (size > 0) { 985 ASSERT(((u8 *)pdir_start)[7] == 0); /* verify availability */ 986 sba_io_pdir_entry(pdir_start, (unsigned long) addr); 987 988 DBG_RUN(" pdir 0x%p %lx\n", pdir_start, *pdir_start); 989 990 addr += iovp_size; 991 size -= iovp_size; 992 pdir_start++; 993 } 994 /* force pdir update */ 995 wmb(); 996 997 /* form complete address */ 998 #ifdef ASSERT_PDIR_SANITY 999 spin_lock_irqsave(&ioc->res_lock, flags); 1000 sba_check_pdir(ioc,"Check after sba_map_page()"); 1001 spin_unlock_irqrestore(&ioc->res_lock, flags); 1002 #endif 1003 return SBA_IOVA(ioc, iovp, offset); 1004 } 1005 1006 #ifdef ENABLE_MARK_CLEAN 1007 static SBA_INLINE void 1008 sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size) 1009 { 1010 u32 iovp = (u32) SBA_IOVP(ioc,iova); 1011 int off = PDIR_INDEX(iovp); 1012 void *addr; 1013 1014 if (size <= iovp_size) { 1015 addr = phys_to_virt(ioc->pdir_base[off] & 1016 ~0xE000000000000FFFULL); 1017 mark_clean(addr, size); 1018 } else { 1019 do { 1020 addr = phys_to_virt(ioc->pdir_base[off] & 1021 ~0xE000000000000FFFULL); 1022 mark_clean(addr, min(size, iovp_size)); 1023 off++; 1024 size -= iovp_size; 1025 } while (size > 0); 1026 } 1027 } 1028 #endif 1029 1030 /** 1031 * sba_unmap_page - unmap one IOVA and free resources 1032 * @dev: instance of PCI owned by the driver that's asking. 1033 * @iova: IOVA of driver buffer previously mapped. 1034 * @size: number of bytes mapped in driver buffer. 1035 * @dir: R/W or both. 1036 * @attrs: optional dma attributes 1037 * 1038 * See Documentation/core-api/dma-api-howto.rst 1039 */ 1040 static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size, 1041 enum dma_data_direction dir, unsigned long attrs) 1042 { 1043 struct ioc *ioc; 1044 #if DELAYED_RESOURCE_CNT > 0 1045 struct sba_dma_pair *d; 1046 #endif 1047 unsigned long flags; 1048 dma_addr_t offset; 1049 1050 ioc = GET_IOC(dev); 1051 ASSERT(ioc); 1052 1053 #ifdef ALLOW_IOV_BYPASS 1054 if (likely((iova & ioc->imask) != ioc->ibase)) { 1055 /* 1056 ** Address does not fall w/in IOVA, must be bypassing 1057 */ 1058 DBG_BYPASS("sba_unmap_page() bypass addr: 0x%lx\n", 1059 iova); 1060 1061 #ifdef ENABLE_MARK_CLEAN 1062 if (dir == DMA_FROM_DEVICE) { 1063 mark_clean(phys_to_virt(iova), size); 1064 } 1065 #endif 1066 return; 1067 } 1068 #endif 1069 offset = iova & ~iovp_mask; 1070 1071 DBG_RUN("%s() iovp 0x%lx/%x\n", __func__, (long) iova, size); 1072 1073 iova ^= offset; /* clear offset bits */ 1074 size += offset; 1075 size = ROUNDUP(size, iovp_size); 1076 1077 #ifdef ENABLE_MARK_CLEAN 1078 if (dir == DMA_FROM_DEVICE) 1079 sba_mark_clean(ioc, iova, size); 1080 #endif 1081 1082 #if DELAYED_RESOURCE_CNT > 0 1083 spin_lock_irqsave(&ioc->saved_lock, flags); 1084 d = &(ioc->saved[ioc->saved_cnt]); 1085 d->iova = iova; 1086 d->size = size; 1087 if (unlikely(++(ioc->saved_cnt) >= DELAYED_RESOURCE_CNT)) { 1088 int cnt = ioc->saved_cnt; 1089 spin_lock(&ioc->res_lock); 1090 while (cnt--) { 1091 sba_mark_invalid(ioc, d->iova, d->size); 1092 sba_free_range(ioc, d->iova, d->size); 1093 d--; 1094 } 1095 ioc->saved_cnt = 0; 1096 READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ 1097 spin_unlock(&ioc->res_lock); 1098 } 1099 spin_unlock_irqrestore(&ioc->saved_lock, flags); 1100 #else /* DELAYED_RESOURCE_CNT == 0 */ 1101 spin_lock_irqsave(&ioc->res_lock, flags); 1102 sba_mark_invalid(ioc, iova, size); 1103 sba_free_range(ioc, iova, size); 1104 READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ 1105 spin_unlock_irqrestore(&ioc->res_lock, flags); 1106 #endif /* DELAYED_RESOURCE_CNT == 0 */ 1107 } 1108 1109 /** 1110 * sba_alloc_coherent - allocate/map shared mem for DMA 1111 * @dev: instance of PCI owned by the driver that's asking. 1112 * @size: number of bytes mapped in driver buffer. 1113 * @dma_handle: IOVA of new buffer. 1114 * 1115 * See Documentation/core-api/dma-api-howto.rst 1116 */ 1117 static void * 1118 sba_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, 1119 gfp_t flags, unsigned long attrs) 1120 { 1121 struct page *page; 1122 struct ioc *ioc; 1123 int node = -1; 1124 void *addr; 1125 1126 ioc = GET_IOC(dev); 1127 ASSERT(ioc); 1128 #ifdef CONFIG_NUMA 1129 node = ioc->node; 1130 #endif 1131 1132 page = alloc_pages_node(node, flags, get_order(size)); 1133 if (unlikely(!page)) 1134 return NULL; 1135 1136 addr = page_address(page); 1137 memset(addr, 0, size); 1138 *dma_handle = page_to_phys(page); 1139 1140 #ifdef ALLOW_IOV_BYPASS 1141 ASSERT(dev->coherent_dma_mask); 1142 /* 1143 ** Check if the PCI device can DMA to ptr... if so, just return ptr 1144 */ 1145 if (likely((*dma_handle & ~dev->coherent_dma_mask) == 0)) { 1146 DBG_BYPASS("sba_alloc_coherent() bypass mask/addr: 0x%lx/0x%lx\n", 1147 dev->coherent_dma_mask, *dma_handle); 1148 1149 return addr; 1150 } 1151 #endif 1152 1153 /* 1154 * If device can't bypass or bypass is disabled, pass the 32bit fake 1155 * device to map single to get an iova mapping. 1156 */ 1157 *dma_handle = sba_map_page(&ioc->sac_only_dev->dev, page, 0, size, 1158 DMA_BIDIRECTIONAL, 0); 1159 if (dma_mapping_error(dev, *dma_handle)) 1160 return NULL; 1161 return addr; 1162 } 1163 1164 1165 /** 1166 * sba_free_coherent - free/unmap shared mem for DMA 1167 * @dev: instance of PCI owned by the driver that's asking. 1168 * @size: number of bytes mapped in driver buffer. 1169 * @vaddr: virtual address IOVA of "consistent" buffer. 1170 * @dma_handler: IO virtual address of "consistent" buffer. 1171 * 1172 * See Documentation/core-api/dma-api-howto.rst 1173 */ 1174 static void sba_free_coherent(struct device *dev, size_t size, void *vaddr, 1175 dma_addr_t dma_handle, unsigned long attrs) 1176 { 1177 sba_unmap_page(dev, dma_handle, size, 0, 0); 1178 free_pages((unsigned long) vaddr, get_order(size)); 1179 } 1180 1181 1182 /* 1183 ** Since 0 is a valid pdir_base index value, can't use that 1184 ** to determine if a value is valid or not. Use a flag to indicate 1185 ** the SG list entry contains a valid pdir index. 1186 */ 1187 #define PIDE_FLAG 0x1UL 1188 1189 #ifdef DEBUG_LARGE_SG_ENTRIES 1190 int dump_run_sg = 0; 1191 #endif 1192 1193 1194 /** 1195 * sba_fill_pdir - write allocated SG entries into IO PDIR 1196 * @ioc: IO MMU structure which owns the pdir we are interested in. 1197 * @startsg: list of IOVA/size pairs 1198 * @nents: number of entries in startsg list 1199 * 1200 * Take preprocessed SG list and write corresponding entries 1201 * in the IO PDIR. 1202 */ 1203 1204 static SBA_INLINE int 1205 sba_fill_pdir( 1206 struct ioc *ioc, 1207 struct scatterlist *startsg, 1208 int nents) 1209 { 1210 struct scatterlist *dma_sg = startsg; /* pointer to current DMA */ 1211 int n_mappings = 0; 1212 u64 *pdirp = NULL; 1213 unsigned long dma_offset = 0; 1214 1215 while (nents-- > 0) { 1216 int cnt = startsg->dma_length; 1217 startsg->dma_length = 0; 1218 1219 #ifdef DEBUG_LARGE_SG_ENTRIES 1220 if (dump_run_sg) 1221 printk(" %2d : %08lx/%05x %p\n", 1222 nents, startsg->dma_address, cnt, 1223 sba_sg_address(startsg)); 1224 #else 1225 DBG_RUN_SG(" %d : %08lx/%05x %p\n", 1226 nents, startsg->dma_address, cnt, 1227 sba_sg_address(startsg)); 1228 #endif 1229 /* 1230 ** Look for the start of a new DMA stream 1231 */ 1232 if (startsg->dma_address & PIDE_FLAG) { 1233 u32 pide = startsg->dma_address & ~PIDE_FLAG; 1234 dma_offset = (unsigned long) pide & ~iovp_mask; 1235 startsg->dma_address = 0; 1236 if (n_mappings) 1237 dma_sg = sg_next(dma_sg); 1238 dma_sg->dma_address = pide | ioc->ibase; 1239 pdirp = &(ioc->pdir_base[pide >> iovp_shift]); 1240 n_mappings++; 1241 } 1242 1243 /* 1244 ** Look for a VCONTIG chunk 1245 */ 1246 if (cnt) { 1247 unsigned long vaddr = (unsigned long) sba_sg_address(startsg); 1248 ASSERT(pdirp); 1249 1250 /* Since multiple Vcontig blocks could make up 1251 ** one DMA stream, *add* cnt to dma_len. 1252 */ 1253 dma_sg->dma_length += cnt; 1254 cnt += dma_offset; 1255 dma_offset=0; /* only want offset on first chunk */ 1256 cnt = ROUNDUP(cnt, iovp_size); 1257 do { 1258 sba_io_pdir_entry(pdirp, vaddr); 1259 vaddr += iovp_size; 1260 cnt -= iovp_size; 1261 pdirp++; 1262 } while (cnt > 0); 1263 } 1264 startsg = sg_next(startsg); 1265 } 1266 /* force pdir update */ 1267 wmb(); 1268 1269 #ifdef DEBUG_LARGE_SG_ENTRIES 1270 dump_run_sg = 0; 1271 #endif 1272 return(n_mappings); 1273 } 1274 1275 1276 /* 1277 ** Two address ranges are DMA contiguous *iff* "end of prev" and 1278 ** "start of next" are both on an IOV page boundary. 1279 ** 1280 ** (shift left is a quick trick to mask off upper bits) 1281 */ 1282 #define DMA_CONTIG(__X, __Y) \ 1283 (((((unsigned long) __X) | ((unsigned long) __Y)) << (BITS_PER_LONG - iovp_shift)) == 0UL) 1284 1285 1286 /** 1287 * sba_coalesce_chunks - preprocess the SG list 1288 * @ioc: IO MMU structure which owns the pdir we are interested in. 1289 * @startsg: list of IOVA/size pairs 1290 * @nents: number of entries in startsg list 1291 * 1292 * First pass is to walk the SG list and determine where the breaks are 1293 * in the DMA stream. Allocates PDIR entries but does not fill them. 1294 * Returns the number of DMA chunks. 1295 * 1296 * Doing the fill separate from the coalescing/allocation keeps the 1297 * code simpler. Future enhancement could make one pass through 1298 * the sglist do both. 1299 */ 1300 static SBA_INLINE int 1301 sba_coalesce_chunks(struct ioc *ioc, struct device *dev, 1302 struct scatterlist *startsg, 1303 int nents) 1304 { 1305 struct scatterlist *vcontig_sg; /* VCONTIG chunk head */ 1306 unsigned long vcontig_len; /* len of VCONTIG chunk */ 1307 unsigned long vcontig_end; 1308 struct scatterlist *dma_sg; /* next DMA stream head */ 1309 unsigned long dma_offset, dma_len; /* start/len of DMA stream */ 1310 int n_mappings = 0; 1311 unsigned int max_seg_size = dma_get_max_seg_size(dev); 1312 int idx; 1313 1314 while (nents > 0) { 1315 unsigned long vaddr = (unsigned long) sba_sg_address(startsg); 1316 1317 /* 1318 ** Prepare for first/next DMA stream 1319 */ 1320 dma_sg = vcontig_sg = startsg; 1321 dma_len = vcontig_len = vcontig_end = startsg->length; 1322 vcontig_end += vaddr; 1323 dma_offset = vaddr & ~iovp_mask; 1324 1325 /* PARANOID: clear entries */ 1326 startsg->dma_address = startsg->dma_length = 0; 1327 1328 /* 1329 ** This loop terminates one iteration "early" since 1330 ** it's always looking one "ahead". 1331 */ 1332 while (--nents > 0) { 1333 unsigned long vaddr; /* tmp */ 1334 1335 startsg = sg_next(startsg); 1336 1337 /* PARANOID */ 1338 startsg->dma_address = startsg->dma_length = 0; 1339 1340 /* catch brokenness in SCSI layer */ 1341 ASSERT(startsg->length <= DMA_CHUNK_SIZE); 1342 1343 /* 1344 ** First make sure current dma stream won't 1345 ** exceed DMA_CHUNK_SIZE if we coalesce the 1346 ** next entry. 1347 */ 1348 if (((dma_len + dma_offset + startsg->length + ~iovp_mask) & iovp_mask) 1349 > DMA_CHUNK_SIZE) 1350 break; 1351 1352 if (dma_len + startsg->length > max_seg_size) 1353 break; 1354 1355 /* 1356 ** Then look for virtually contiguous blocks. 1357 ** 1358 ** append the next transaction? 1359 */ 1360 vaddr = (unsigned long) sba_sg_address(startsg); 1361 if (vcontig_end == vaddr) 1362 { 1363 vcontig_len += startsg->length; 1364 vcontig_end += startsg->length; 1365 dma_len += startsg->length; 1366 continue; 1367 } 1368 1369 #ifdef DEBUG_LARGE_SG_ENTRIES 1370 dump_run_sg = (vcontig_len > iovp_size); 1371 #endif 1372 1373 /* 1374 ** Not virtually contiguous. 1375 ** Terminate prev chunk. 1376 ** Start a new chunk. 1377 ** 1378 ** Once we start a new VCONTIG chunk, dma_offset 1379 ** can't change. And we need the offset from the first 1380 ** chunk - not the last one. Ergo Successive chunks 1381 ** must start on page boundaries and dove tail 1382 ** with it's predecessor. 1383 */ 1384 vcontig_sg->dma_length = vcontig_len; 1385 1386 vcontig_sg = startsg; 1387 vcontig_len = startsg->length; 1388 1389 /* 1390 ** 3) do the entries end/start on page boundaries? 1391 ** Don't update vcontig_end until we've checked. 1392 */ 1393 if (DMA_CONTIG(vcontig_end, vaddr)) 1394 { 1395 vcontig_end = vcontig_len + vaddr; 1396 dma_len += vcontig_len; 1397 continue; 1398 } else { 1399 break; 1400 } 1401 } 1402 1403 /* 1404 ** End of DMA Stream 1405 ** Terminate last VCONTIG block. 1406 ** Allocate space for DMA stream. 1407 */ 1408 vcontig_sg->dma_length = vcontig_len; 1409 dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask; 1410 ASSERT(dma_len <= DMA_CHUNK_SIZE); 1411 idx = sba_alloc_range(ioc, dev, dma_len); 1412 if (idx < 0) { 1413 dma_sg->dma_length = 0; 1414 return -1; 1415 } 1416 dma_sg->dma_address = (dma_addr_t)(PIDE_FLAG | (idx << iovp_shift) 1417 | dma_offset); 1418 n_mappings++; 1419 } 1420 1421 return n_mappings; 1422 } 1423 1424 static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, 1425 int nents, enum dma_data_direction dir, 1426 unsigned long attrs); 1427 /** 1428 * sba_map_sg - map Scatter/Gather list 1429 * @dev: instance of PCI owned by the driver that's asking. 1430 * @sglist: array of buffer/length pairs 1431 * @nents: number of entries in list 1432 * @dir: R/W or both. 1433 * @attrs: optional dma attributes 1434 * 1435 * See Documentation/core-api/dma-api-howto.rst 1436 */ 1437 static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist, 1438 int nents, enum dma_data_direction dir, 1439 unsigned long attrs) 1440 { 1441 struct ioc *ioc; 1442 int coalesced, filled = 0; 1443 #ifdef ASSERT_PDIR_SANITY 1444 unsigned long flags; 1445 #endif 1446 #ifdef ALLOW_IOV_BYPASS_SG 1447 struct scatterlist *sg; 1448 #endif 1449 1450 DBG_RUN_SG("%s() START %d entries\n", __func__, nents); 1451 ioc = GET_IOC(dev); 1452 ASSERT(ioc); 1453 1454 #ifdef ALLOW_IOV_BYPASS_SG 1455 ASSERT(to_pci_dev(dev)->dma_mask); 1456 if (likely((ioc->dma_mask & ~to_pci_dev(dev)->dma_mask) == 0)) { 1457 for_each_sg(sglist, sg, nents, filled) { 1458 sg->dma_length = sg->length; 1459 sg->dma_address = virt_to_phys(sba_sg_address(sg)); 1460 } 1461 return filled; 1462 } 1463 #endif 1464 /* Fast path single entry scatterlists. */ 1465 if (nents == 1) { 1466 sglist->dma_length = sglist->length; 1467 sglist->dma_address = sba_map_page(dev, sg_page(sglist), 1468 sglist->offset, sglist->length, dir, attrs); 1469 if (dma_mapping_error(dev, sglist->dma_address)) 1470 return -EIO; 1471 return 1; 1472 } 1473 1474 #ifdef ASSERT_PDIR_SANITY 1475 spin_lock_irqsave(&ioc->res_lock, flags); 1476 if (sba_check_pdir(ioc,"Check before sba_map_sg_attrs()")) 1477 { 1478 sba_dump_sg(ioc, sglist, nents); 1479 panic("Check before sba_map_sg_attrs()"); 1480 } 1481 spin_unlock_irqrestore(&ioc->res_lock, flags); 1482 #endif 1483 1484 prefetch(ioc->res_hint); 1485 1486 /* 1487 ** First coalesce the chunks and allocate I/O pdir space 1488 ** 1489 ** If this is one DMA stream, we can properly map using the 1490 ** correct virtual address associated with each DMA page. 1491 ** w/o this association, we wouldn't have coherent DMA! 1492 ** Access to the virtual address is what forces a two pass algorithm. 1493 */ 1494 coalesced = sba_coalesce_chunks(ioc, dev, sglist, nents); 1495 if (coalesced < 0) { 1496 sba_unmap_sg_attrs(dev, sglist, nents, dir, attrs); 1497 return -ENOMEM; 1498 } 1499 1500 /* 1501 ** Program the I/O Pdir 1502 ** 1503 ** map the virtual addresses to the I/O Pdir 1504 ** o dma_address will contain the pdir index 1505 ** o dma_len will contain the number of bytes to map 1506 ** o address contains the virtual address. 1507 */ 1508 filled = sba_fill_pdir(ioc, sglist, nents); 1509 1510 #ifdef ASSERT_PDIR_SANITY 1511 spin_lock_irqsave(&ioc->res_lock, flags); 1512 if (sba_check_pdir(ioc,"Check after sba_map_sg_attrs()")) 1513 { 1514 sba_dump_sg(ioc, sglist, nents); 1515 panic("Check after sba_map_sg_attrs()\n"); 1516 } 1517 spin_unlock_irqrestore(&ioc->res_lock, flags); 1518 #endif 1519 1520 ASSERT(coalesced == filled); 1521 DBG_RUN_SG("%s() DONE %d mappings\n", __func__, filled); 1522 1523 return filled; 1524 } 1525 1526 /** 1527 * sba_unmap_sg_attrs - unmap Scatter/Gather list 1528 * @dev: instance of PCI owned by the driver that's asking. 1529 * @sglist: array of buffer/length pairs 1530 * @nents: number of entries in list 1531 * @dir: R/W or both. 1532 * @attrs: optional dma attributes 1533 * 1534 * See Documentation/core-api/dma-api-howto.rst 1535 */ 1536 static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, 1537 int nents, enum dma_data_direction dir, 1538 unsigned long attrs) 1539 { 1540 #ifdef ASSERT_PDIR_SANITY 1541 struct ioc *ioc; 1542 unsigned long flags; 1543 #endif 1544 1545 DBG_RUN_SG("%s() START %d entries, %p,%x\n", 1546 __func__, nents, sba_sg_address(sglist), sglist->length); 1547 1548 #ifdef ASSERT_PDIR_SANITY 1549 ioc = GET_IOC(dev); 1550 ASSERT(ioc); 1551 1552 spin_lock_irqsave(&ioc->res_lock, flags); 1553 sba_check_pdir(ioc,"Check before sba_unmap_sg_attrs()"); 1554 spin_unlock_irqrestore(&ioc->res_lock, flags); 1555 #endif 1556 1557 while (nents && sglist->dma_length) { 1558 1559 sba_unmap_page(dev, sglist->dma_address, sglist->dma_length, 1560 dir, attrs); 1561 sglist = sg_next(sglist); 1562 nents--; 1563 } 1564 1565 DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents); 1566 1567 #ifdef ASSERT_PDIR_SANITY 1568 spin_lock_irqsave(&ioc->res_lock, flags); 1569 sba_check_pdir(ioc,"Check after sba_unmap_sg_attrs()"); 1570 spin_unlock_irqrestore(&ioc->res_lock, flags); 1571 #endif 1572 1573 } 1574 1575 /************************************************************** 1576 * 1577 * Initialization and claim 1578 * 1579 ***************************************************************/ 1580 1581 static void 1582 ioc_iova_init(struct ioc *ioc) 1583 { 1584 int tcnfg; 1585 int agp_found = 0; 1586 struct pci_dev *device = NULL; 1587 #ifdef FULL_VALID_PDIR 1588 unsigned long index; 1589 #endif 1590 1591 /* 1592 ** Firmware programs the base and size of a "safe IOVA space" 1593 ** (one that doesn't overlap memory or LMMIO space) in the 1594 ** IBASE and IMASK registers. 1595 */ 1596 ioc->ibase = READ_REG(ioc->ioc_hpa + IOC_IBASE) & ~0x1UL; 1597 ioc->imask = READ_REG(ioc->ioc_hpa + IOC_IMASK) | 0xFFFFFFFF00000000UL; 1598 1599 ioc->iov_size = ~ioc->imask + 1; 1600 1601 DBG_INIT("%s() hpa %p IOV base 0x%lx mask 0x%lx (%dMB)\n", 1602 __func__, ioc->ioc_hpa, ioc->ibase, ioc->imask, 1603 ioc->iov_size >> 20); 1604 1605 switch (iovp_size) { 1606 case 4*1024: tcnfg = 0; break; 1607 case 8*1024: tcnfg = 1; break; 1608 case 16*1024: tcnfg = 2; break; 1609 case 64*1024: tcnfg = 3; break; 1610 default: 1611 panic(PFX "Unsupported IOTLB page size %ldK", 1612 iovp_size >> 10); 1613 break; 1614 } 1615 WRITE_REG(tcnfg, ioc->ioc_hpa + IOC_TCNFG); 1616 1617 ioc->pdir_size = (ioc->iov_size / iovp_size) * PDIR_ENTRY_SIZE; 1618 ioc->pdir_base = (void *) __get_free_pages(GFP_KERNEL, 1619 get_order(ioc->pdir_size)); 1620 if (!ioc->pdir_base) 1621 panic(PFX "Couldn't allocate I/O Page Table\n"); 1622 1623 memset(ioc->pdir_base, 0, ioc->pdir_size); 1624 1625 DBG_INIT("%s() IOV page size %ldK pdir %p size %x\n", __func__, 1626 iovp_size >> 10, ioc->pdir_base, ioc->pdir_size); 1627 1628 ASSERT(ALIGN((unsigned long) ioc->pdir_base, 4*1024) == (unsigned long) ioc->pdir_base); 1629 WRITE_REG(virt_to_phys(ioc->pdir_base), ioc->ioc_hpa + IOC_PDIR_BASE); 1630 1631 /* 1632 ** If an AGP device is present, only use half of the IOV space 1633 ** for PCI DMA. Unfortunately we can't know ahead of time 1634 ** whether GART support will actually be used, for now we 1635 ** can just key on an AGP device found in the system. 1636 ** We program the next pdir index after we stop w/ a key for 1637 ** the GART code to handshake on. 1638 */ 1639 for_each_pci_dev(device) 1640 agp_found |= pci_find_capability(device, PCI_CAP_ID_AGP); 1641 1642 if (agp_found && reserve_sba_gart) { 1643 printk(KERN_INFO PFX "reserving %dMb of IOVA space at 0x%lx for agpgart\n", 1644 ioc->iov_size/2 >> 20, ioc->ibase + ioc->iov_size/2); 1645 ioc->pdir_size /= 2; 1646 ((u64 *)ioc->pdir_base)[PDIR_INDEX(ioc->iov_size/2)] = ZX1_SBA_IOMMU_COOKIE; 1647 } 1648 #ifdef FULL_VALID_PDIR 1649 /* 1650 ** Check to see if the spill page has been allocated, we don't need more than 1651 ** one across multiple SBAs. 1652 */ 1653 if (!prefetch_spill_page) { 1654 char *spill_poison = "SBAIOMMU POISON"; 1655 int poison_size = 16; 1656 void *poison_addr, *addr; 1657 1658 addr = (void *)__get_free_pages(GFP_KERNEL, get_order(iovp_size)); 1659 if (!addr) 1660 panic(PFX "Couldn't allocate PDIR spill page\n"); 1661 1662 poison_addr = addr; 1663 for ( ; (u64) poison_addr < addr + iovp_size; poison_addr += poison_size) 1664 memcpy(poison_addr, spill_poison, poison_size); 1665 1666 prefetch_spill_page = virt_to_phys(addr); 1667 1668 DBG_INIT("%s() prefetch spill addr: 0x%lx\n", __func__, prefetch_spill_page); 1669 } 1670 /* 1671 ** Set all the PDIR entries valid w/ the spill page as the target 1672 */ 1673 for (index = 0 ; index < (ioc->pdir_size / PDIR_ENTRY_SIZE) ; index++) 1674 ((u64 *)ioc->pdir_base)[index] = (0x80000000000000FF | prefetch_spill_page); 1675 #endif 1676 1677 /* Clear I/O TLB of any possible entries */ 1678 WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) + iovp_shift), ioc->ioc_hpa + IOC_PCOM); 1679 READ_REG(ioc->ioc_hpa + IOC_PCOM); 1680 1681 /* Enable IOVA translation */ 1682 WRITE_REG(ioc->ibase | 1, ioc->ioc_hpa + IOC_IBASE); 1683 READ_REG(ioc->ioc_hpa + IOC_IBASE); 1684 } 1685 1686 static void __init 1687 ioc_resource_init(struct ioc *ioc) 1688 { 1689 spin_lock_init(&ioc->res_lock); 1690 #if DELAYED_RESOURCE_CNT > 0 1691 spin_lock_init(&ioc->saved_lock); 1692 #endif 1693 1694 /* resource map size dictated by pdir_size */ 1695 ioc->res_size = ioc->pdir_size / PDIR_ENTRY_SIZE; /* entries */ 1696 ioc->res_size >>= 3; /* convert bit count to byte count */ 1697 DBG_INIT("%s() res_size 0x%x\n", __func__, ioc->res_size); 1698 1699 ioc->res_map = (char *) __get_free_pages(GFP_KERNEL, 1700 get_order(ioc->res_size)); 1701 if (!ioc->res_map) 1702 panic(PFX "Couldn't allocate resource map\n"); 1703 1704 memset(ioc->res_map, 0, ioc->res_size); 1705 /* next available IOVP - circular search */ 1706 ioc->res_hint = (unsigned long *) ioc->res_map; 1707 1708 #ifdef ASSERT_PDIR_SANITY 1709 /* Mark first bit busy - ie no IOVA 0 */ 1710 ioc->res_map[0] = 0x1; 1711 ioc->pdir_base[0] = 0x8000000000000000ULL | ZX1_SBA_IOMMU_COOKIE; 1712 #endif 1713 #ifdef FULL_VALID_PDIR 1714 /* Mark the last resource used so we don't prefetch beyond IOVA space */ 1715 ioc->res_map[ioc->res_size - 1] |= 0x80UL; /* res_map is chars */ 1716 ioc->pdir_base[(ioc->pdir_size / PDIR_ENTRY_SIZE) - 1] = (0x80000000000000FF 1717 | prefetch_spill_page); 1718 #endif 1719 1720 DBG_INIT("%s() res_map %x %p\n", __func__, 1721 ioc->res_size, (void *) ioc->res_map); 1722 } 1723 1724 static void __init 1725 ioc_sac_init(struct ioc *ioc) 1726 { 1727 struct pci_dev *sac = NULL; 1728 struct pci_controller *controller = NULL; 1729 1730 /* 1731 * pci_alloc_coherent() must return a DMA address which is 1732 * SAC (single address cycle) addressable, so allocate a 1733 * pseudo-device to enforce that. 1734 */ 1735 sac = kzalloc(sizeof(*sac), GFP_KERNEL); 1736 if (!sac) 1737 panic(PFX "Couldn't allocate struct pci_dev"); 1738 1739 controller = kzalloc(sizeof(*controller), GFP_KERNEL); 1740 if (!controller) 1741 panic(PFX "Couldn't allocate struct pci_controller"); 1742 1743 controller->iommu = ioc; 1744 sac->sysdata = controller; 1745 sac->dma_mask = 0xFFFFFFFFUL; 1746 sac->dev.bus = &pci_bus_type; 1747 ioc->sac_only_dev = sac; 1748 } 1749 1750 static void __init 1751 ioc_zx1_init(struct ioc *ioc) 1752 { 1753 unsigned long rope_config; 1754 unsigned int i; 1755 1756 if (ioc->rev < 0x20) 1757 panic(PFX "IOC 2.0 or later required for IOMMU support\n"); 1758 1759 /* 38 bit memory controller + extra bit for range displaced by MMIO */ 1760 ioc->dma_mask = (0x1UL << 39) - 1; 1761 1762 /* 1763 ** Clear ROPE(N)_CONFIG AO bit. 1764 ** Disables "NT Ordering" (~= !"Relaxed Ordering") 1765 ** Overrides bit 1 in DMA Hint Sets. 1766 ** Improves netperf UDP_STREAM by ~10% for tg3 on bcm5701. 1767 */ 1768 for (i=0; i<(8*8); i+=8) { 1769 rope_config = READ_REG(ioc->ioc_hpa + IOC_ROPE0_CFG + i); 1770 rope_config &= ~IOC_ROPE_AO; 1771 WRITE_REG(rope_config, ioc->ioc_hpa + IOC_ROPE0_CFG + i); 1772 } 1773 } 1774 1775 typedef void (initfunc)(struct ioc *); 1776 1777 struct ioc_iommu { 1778 u32 func_id; 1779 char *name; 1780 initfunc *init; 1781 }; 1782 1783 static struct ioc_iommu ioc_iommu_info[] __initdata = { 1784 { ZX1_IOC_ID, "zx1", ioc_zx1_init }, 1785 { ZX2_IOC_ID, "zx2", NULL }, 1786 { SX1000_IOC_ID, "sx1000", NULL }, 1787 { SX2000_IOC_ID, "sx2000", NULL }, 1788 }; 1789 1790 static void __init ioc_init(unsigned long hpa, struct ioc *ioc) 1791 { 1792 struct ioc_iommu *info; 1793 1794 ioc->next = ioc_list; 1795 ioc_list = ioc; 1796 1797 ioc->ioc_hpa = ioremap(hpa, 0x1000); 1798 1799 ioc->func_id = READ_REG(ioc->ioc_hpa + IOC_FUNC_ID); 1800 ioc->rev = READ_REG(ioc->ioc_hpa + IOC_FCLASS) & 0xFFUL; 1801 ioc->dma_mask = 0xFFFFFFFFFFFFFFFFUL; /* conservative */ 1802 1803 for (info = ioc_iommu_info; info < ioc_iommu_info + ARRAY_SIZE(ioc_iommu_info); info++) { 1804 if (ioc->func_id == info->func_id) { 1805 ioc->name = info->name; 1806 if (info->init) 1807 (info->init)(ioc); 1808 } 1809 } 1810 1811 iovp_size = (1 << iovp_shift); 1812 iovp_mask = ~(iovp_size - 1); 1813 1814 DBG_INIT("%s: PAGE_SIZE %ldK, iovp_size %ldK\n", __func__, 1815 PAGE_SIZE >> 10, iovp_size >> 10); 1816 1817 if (!ioc->name) { 1818 ioc->name = kmalloc(24, GFP_KERNEL); 1819 if (ioc->name) 1820 sprintf((char *) ioc->name, "Unknown (%04x:%04x)", 1821 ioc->func_id & 0xFFFF, (ioc->func_id >> 16) & 0xFFFF); 1822 else 1823 ioc->name = "Unknown"; 1824 } 1825 1826 ioc_iova_init(ioc); 1827 ioc_resource_init(ioc); 1828 ioc_sac_init(ioc); 1829 1830 printk(KERN_INFO PFX 1831 "%s %d.%d HPA 0x%lx IOVA space %dMb at 0x%lx\n", 1832 ioc->name, (ioc->rev >> 4) & 0xF, ioc->rev & 0xF, 1833 hpa, ioc->iov_size >> 20, ioc->ibase); 1834 } 1835 1836 1837 1838 /************************************************************************** 1839 ** 1840 ** SBA initialization code (HW and SW) 1841 ** 1842 ** o identify SBA chip itself 1843 ** o FIXME: initialize DMA hints for reasonable defaults 1844 ** 1845 **************************************************************************/ 1846 1847 #ifdef CONFIG_PROC_FS 1848 static void * 1849 ioc_start(struct seq_file *s, loff_t *pos) 1850 { 1851 struct ioc *ioc; 1852 loff_t n = *pos; 1853 1854 for (ioc = ioc_list; ioc; ioc = ioc->next) 1855 if (!n--) 1856 return ioc; 1857 1858 return NULL; 1859 } 1860 1861 static void * 1862 ioc_next(struct seq_file *s, void *v, loff_t *pos) 1863 { 1864 struct ioc *ioc = v; 1865 1866 ++*pos; 1867 return ioc->next; 1868 } 1869 1870 static void 1871 ioc_stop(struct seq_file *s, void *v) 1872 { 1873 } 1874 1875 static int 1876 ioc_show(struct seq_file *s, void *v) 1877 { 1878 struct ioc *ioc = v; 1879 unsigned long *res_ptr = (unsigned long *)ioc->res_map; 1880 int i, used = 0; 1881 1882 seq_printf(s, "Hewlett Packard %s IOC rev %d.%d\n", 1883 ioc->name, ((ioc->rev >> 4) & 0xF), (ioc->rev & 0xF)); 1884 #ifdef CONFIG_NUMA 1885 if (ioc->node != NUMA_NO_NODE) 1886 seq_printf(s, "NUMA node : %d\n", ioc->node); 1887 #endif 1888 seq_printf(s, "IOVA size : %ld MB\n", ((ioc->pdir_size >> 3) * iovp_size)/(1024*1024)); 1889 seq_printf(s, "IOVA page size : %ld kb\n", iovp_size/1024); 1890 1891 for (i = 0; i < (ioc->res_size / sizeof(unsigned long)); ++i, ++res_ptr) 1892 used += hweight64(*res_ptr); 1893 1894 seq_printf(s, "PDIR size : %d entries\n", ioc->pdir_size >> 3); 1895 seq_printf(s, "PDIR used : %d entries\n", used); 1896 1897 #ifdef PDIR_SEARCH_TIMING 1898 { 1899 unsigned long i = 0, avg = 0, min, max; 1900 min = max = ioc->avg_search[0]; 1901 for (i = 0; i < SBA_SEARCH_SAMPLE; i++) { 1902 avg += ioc->avg_search[i]; 1903 if (ioc->avg_search[i] > max) max = ioc->avg_search[i]; 1904 if (ioc->avg_search[i] < min) min = ioc->avg_search[i]; 1905 } 1906 avg /= SBA_SEARCH_SAMPLE; 1907 seq_printf(s, "Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles/IOVA page)\n", 1908 min, avg, max); 1909 } 1910 #endif 1911 #ifndef ALLOW_IOV_BYPASS 1912 seq_printf(s, "IOVA bypass disabled\n"); 1913 #endif 1914 return 0; 1915 } 1916 1917 static const struct seq_operations ioc_seq_ops = { 1918 .start = ioc_start, 1919 .next = ioc_next, 1920 .stop = ioc_stop, 1921 .show = ioc_show 1922 }; 1923 1924 static void __init 1925 ioc_proc_init(void) 1926 { 1927 struct proc_dir_entry *dir; 1928 1929 dir = proc_mkdir("bus/mckinley", NULL); 1930 if (!dir) 1931 return; 1932 1933 proc_create_seq(ioc_list->name, 0, dir, &ioc_seq_ops); 1934 } 1935 #endif 1936 1937 static void 1938 sba_connect_bus(struct pci_bus *bus) 1939 { 1940 acpi_handle handle, parent; 1941 acpi_status status; 1942 struct ioc *ioc; 1943 1944 if (!PCI_CONTROLLER(bus)) 1945 panic(PFX "no sysdata on bus %d!\n", bus->number); 1946 1947 if (PCI_CONTROLLER(bus)->iommu) 1948 return; 1949 1950 handle = acpi_device_handle(PCI_CONTROLLER(bus)->companion); 1951 if (!handle) 1952 return; 1953 1954 /* 1955 * The IOC scope encloses PCI root bridges in the ACPI 1956 * namespace, so work our way out until we find an IOC we 1957 * claimed previously. 1958 */ 1959 do { 1960 for (ioc = ioc_list; ioc; ioc = ioc->next) 1961 if (ioc->handle == handle) { 1962 PCI_CONTROLLER(bus)->iommu = ioc; 1963 return; 1964 } 1965 1966 status = acpi_get_parent(handle, &parent); 1967 handle = parent; 1968 } while (ACPI_SUCCESS(status)); 1969 1970 printk(KERN_WARNING "No IOC for PCI Bus %04x:%02x in ACPI\n", pci_domain_nr(bus), bus->number); 1971 } 1972 1973 static void __init 1974 sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle) 1975 { 1976 #ifdef CONFIG_NUMA 1977 unsigned int node; 1978 1979 node = acpi_get_node(handle); 1980 if (node != NUMA_NO_NODE && !node_online(node)) 1981 node = NUMA_NO_NODE; 1982 1983 ioc->node = node; 1984 #endif 1985 } 1986 1987 static void __init acpi_sba_ioc_add(struct ioc *ioc) 1988 { 1989 acpi_handle handle = ioc->handle; 1990 acpi_status status; 1991 u64 hpa, length; 1992 struct acpi_device_info *adi; 1993 1994 ioc_found = ioc->next; 1995 status = hp_acpi_csr_space(handle, &hpa, &length); 1996 if (ACPI_FAILURE(status)) 1997 goto err; 1998 1999 status = acpi_get_object_info(handle, &adi); 2000 if (ACPI_FAILURE(status)) 2001 goto err; 2002 2003 /* 2004 * For HWP0001, only SBA appears in ACPI namespace. It encloses the PCI 2005 * root bridges, and its CSR space includes the IOC function. 2006 */ 2007 if (strncmp("HWP0001", adi->hardware_id.string, 7) == 0) { 2008 hpa += ZX1_IOC_OFFSET; 2009 /* zx1 based systems default to kernel page size iommu pages */ 2010 if (!iovp_shift) 2011 iovp_shift = min(PAGE_SHIFT, 16); 2012 } 2013 kfree(adi); 2014 2015 /* 2016 * default anything not caught above or specified on cmdline to 4k 2017 * iommu page size 2018 */ 2019 if (!iovp_shift) 2020 iovp_shift = 12; 2021 2022 ioc_init(hpa, ioc); 2023 /* setup NUMA node association */ 2024 sba_map_ioc_to_node(ioc, handle); 2025 return; 2026 2027 err: 2028 kfree(ioc); 2029 } 2030 2031 static const struct acpi_device_id hp_ioc_iommu_device_ids[] = { 2032 {"HWP0001", 0}, 2033 {"HWP0004", 0}, 2034 {"", 0}, 2035 }; 2036 2037 static int acpi_sba_ioc_attach(struct acpi_device *device, 2038 const struct acpi_device_id *not_used) 2039 { 2040 struct ioc *ioc; 2041 2042 ioc = kzalloc(sizeof(*ioc), GFP_KERNEL); 2043 if (!ioc) 2044 return -ENOMEM; 2045 2046 ioc->next = ioc_found; 2047 ioc_found = ioc; 2048 ioc->handle = device->handle; 2049 return 1; 2050 } 2051 2052 2053 static struct acpi_scan_handler acpi_sba_ioc_handler = { 2054 .ids = hp_ioc_iommu_device_ids, 2055 .attach = acpi_sba_ioc_attach, 2056 }; 2057 2058 static int __init acpi_sba_ioc_init_acpi(void) 2059 { 2060 return acpi_scan_add_handler(&acpi_sba_ioc_handler); 2061 } 2062 /* This has to run before acpi_scan_init(). */ 2063 arch_initcall(acpi_sba_ioc_init_acpi); 2064 2065 static int sba_dma_supported (struct device *dev, u64 mask) 2066 { 2067 /* make sure it's at least 32bit capable */ 2068 return ((mask & 0xFFFFFFFFUL) == 0xFFFFFFFFUL); 2069 } 2070 2071 static const struct dma_map_ops sba_dma_ops = { 2072 .alloc = sba_alloc_coherent, 2073 .free = sba_free_coherent, 2074 .map_page = sba_map_page, 2075 .unmap_page = sba_unmap_page, 2076 .map_sg = sba_map_sg_attrs, 2077 .unmap_sg = sba_unmap_sg_attrs, 2078 .dma_supported = sba_dma_supported, 2079 .mmap = dma_common_mmap, 2080 .get_sgtable = dma_common_get_sgtable, 2081 .alloc_pages = dma_common_alloc_pages, 2082 .free_pages = dma_common_free_pages, 2083 }; 2084 2085 static int __init 2086 sba_init(void) 2087 { 2088 /* 2089 * If we are booting a kdump kernel, the sba_iommu will cause devices 2090 * that were not shutdown properly to MCA as soon as they are turned 2091 * back on. Our only option for a successful kdump kernel boot is to 2092 * use swiotlb. 2093 */ 2094 if (is_kdump_kernel()) 2095 return 0; 2096 2097 /* 2098 * ioc_found should be populated by the acpi_sba_ioc_handler's .attach() 2099 * routine, but that only happens if acpi_scan_init() has already run. 2100 */ 2101 while (ioc_found) 2102 acpi_sba_ioc_add(ioc_found); 2103 2104 if (!ioc_list) 2105 return 0; 2106 2107 { 2108 struct pci_bus *b = NULL; 2109 while ((b = pci_find_next_bus(b)) != NULL) 2110 sba_connect_bus(b); 2111 } 2112 2113 /* no need for swiotlb with the iommu */ 2114 swiotlb_exit(); 2115 dma_ops = &sba_dma_ops; 2116 2117 #ifdef CONFIG_PROC_FS 2118 ioc_proc_init(); 2119 #endif 2120 return 0; 2121 } 2122 2123 subsys_initcall(sba_init); /* must be initialized after ACPI etc., but before any drivers... */ 2124 2125 static int __init 2126 nosbagart(char *str) 2127 { 2128 reserve_sba_gart = 0; 2129 return 1; 2130 } 2131 2132 __setup("nosbagart", nosbagart); 2133 2134 static int __init 2135 sba_page_override(char *str) 2136 { 2137 unsigned long page_size; 2138 2139 page_size = memparse(str, &str); 2140 switch (page_size) { 2141 case 4096: 2142 case 8192: 2143 case 16384: 2144 case 65536: 2145 iovp_shift = ffs(page_size) - 1; 2146 break; 2147 default: 2148 printk("%s: unknown/unsupported iommu page size %ld\n", 2149 __func__, page_size); 2150 } 2151 2152 return 1; 2153 } 2154 2155 __setup("sbapagesize=",sba_page_override); 2156