1 /* 2 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation 3 * 4 * Rewrite, cleanup, new allocation schemes, virtual merging: 5 * Copyright (C) 2004 Olof Johansson, IBM Corporation 6 * and Ben. Herrenschmidt, IBM Corporation 7 * 8 * Dynamic DMA mapping support, bus-independent parts. 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of the GNU General Public License as published by 12 * the Free Software Foundation; either version 2 of the License, or 13 * (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public License 21 * along with this program; if not, write to the Free Software 22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23 */ 24 25 26 #include <linux/init.h> 27 #include <linux/types.h> 28 #include <linux/slab.h> 29 #include <linux/mm.h> 30 #include <linux/spinlock.h> 31 #include <linux/string.h> 32 #include <linux/dma-mapping.h> 33 #include <linux/bitmap.h> 34 #include <linux/iommu-helper.h> 35 #include <linux/crash_dump.h> 36 #include <linux/hash.h> 37 #include <linux/fault-inject.h> 38 #include <linux/pci.h> 39 #include <asm/io.h> 40 #include <asm/prom.h> 41 #include <asm/iommu.h> 42 #include <asm/pci-bridge.h> 43 #include <asm/machdep.h> 44 #include <asm/kdump.h> 45 #include <asm/fadump.h> 46 #include <asm/vio.h> 47 48 #define DBG(...) 49 50 static int novmerge; 51 52 static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int); 53 54 static int __init setup_iommu(char *str) 55 { 56 if (!strcmp(str, "novmerge")) 57 novmerge = 1; 58 else if (!strcmp(str, "vmerge")) 59 novmerge = 0; 60 return 1; 61 } 62 63 __setup("iommu=", setup_iommu); 64 65 static DEFINE_PER_CPU(unsigned int, iommu_pool_hash); 66 67 /* 68 * We precalculate the hash to avoid doing it on every allocation. 69 * 70 * The hash is important to spread CPUs across all the pools. For example, 71 * on a POWER7 with 4 way SMT we want interrupts on the primary threads and 72 * with 4 pools all primary threads would map to the same pool. 73 */ 74 static int __init setup_iommu_pool_hash(void) 75 { 76 unsigned int i; 77 78 for_each_possible_cpu(i) 79 per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS); 80 81 return 0; 82 } 83 subsys_initcall(setup_iommu_pool_hash); 84 85 #ifdef CONFIG_FAIL_IOMMU 86 87 static DECLARE_FAULT_ATTR(fail_iommu); 88 89 static int __init setup_fail_iommu(char *str) 90 { 91 return setup_fault_attr(&fail_iommu, str); 92 } 93 __setup("fail_iommu=", setup_fail_iommu); 94 95 static bool should_fail_iommu(struct device *dev) 96 { 97 return dev->archdata.fail_iommu && should_fail(&fail_iommu, 1); 98 } 99 100 static int __init fail_iommu_debugfs(void) 101 { 102 struct dentry *dir = fault_create_debugfs_attr("fail_iommu", 103 NULL, &fail_iommu); 104 105 return IS_ERR(dir) ? PTR_ERR(dir) : 0; 106 } 107 late_initcall(fail_iommu_debugfs); 108 109 static ssize_t fail_iommu_show(struct device *dev, 110 struct device_attribute *attr, char *buf) 111 { 112 return sprintf(buf, "%d\n", dev->archdata.fail_iommu); 113 } 114 115 static ssize_t fail_iommu_store(struct device *dev, 116 struct device_attribute *attr, const char *buf, 117 size_t count) 118 { 119 int i; 120 121 if (count > 0 && sscanf(buf, "%d", &i) > 0) 122 dev->archdata.fail_iommu = (i == 0) ? 0 : 1; 123 124 return count; 125 } 126 127 static DEVICE_ATTR(fail_iommu, S_IRUGO|S_IWUSR, fail_iommu_show, 128 fail_iommu_store); 129 130 static int fail_iommu_bus_notify(struct notifier_block *nb, 131 unsigned long action, void *data) 132 { 133 struct device *dev = data; 134 135 if (action == BUS_NOTIFY_ADD_DEVICE) { 136 if (device_create_file(dev, &dev_attr_fail_iommu)) 137 pr_warn("Unable to create IOMMU fault injection sysfs " 138 "entries\n"); 139 } else if (action == BUS_NOTIFY_DEL_DEVICE) { 140 device_remove_file(dev, &dev_attr_fail_iommu); 141 } 142 143 return 0; 144 } 145 146 static struct notifier_block fail_iommu_bus_notifier = { 147 .notifier_call = fail_iommu_bus_notify 148 }; 149 150 static int __init fail_iommu_setup(void) 151 { 152 #ifdef CONFIG_PCI 153 bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier); 154 #endif 155 #ifdef CONFIG_IBMVIO 156 bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier); 157 #endif 158 159 return 0; 160 } 161 /* 162 * Must execute after PCI and VIO subsystem have initialised but before 163 * devices are probed. 164 */ 165 arch_initcall(fail_iommu_setup); 166 #else 167 static inline bool should_fail_iommu(struct device *dev) 168 { 169 return false; 170 } 171 #endif 172 173 static unsigned long iommu_range_alloc(struct device *dev, 174 struct iommu_table *tbl, 175 unsigned long npages, 176 unsigned long *handle, 177 unsigned long mask, 178 unsigned int align_order) 179 { 180 unsigned long n, end, start; 181 unsigned long limit; 182 int largealloc = npages > 15; 183 int pass = 0; 184 unsigned long align_mask; 185 unsigned long boundary_size; 186 unsigned long flags; 187 unsigned int pool_nr; 188 struct iommu_pool *pool; 189 190 align_mask = 0xffffffffffffffffl >> (64 - align_order); 191 192 /* This allocator was derived from x86_64's bit string search */ 193 194 /* Sanity check */ 195 if (unlikely(npages == 0)) { 196 if (printk_ratelimit()) 197 WARN_ON(1); 198 return DMA_ERROR_CODE; 199 } 200 201 if (should_fail_iommu(dev)) 202 return DMA_ERROR_CODE; 203 204 /* 205 * We don't need to disable preemption here because any CPU can 206 * safely use any IOMMU pool. 207 */ 208 pool_nr = __raw_get_cpu_var(iommu_pool_hash) & (tbl->nr_pools - 1); 209 210 if (largealloc) 211 pool = &(tbl->large_pool); 212 else 213 pool = &(tbl->pools[pool_nr]); 214 215 spin_lock_irqsave(&(pool->lock), flags); 216 217 again: 218 if ((pass == 0) && handle && *handle) 219 start = *handle; 220 else 221 start = pool->hint; 222 223 limit = pool->end; 224 225 /* The case below can happen if we have a small segment appended 226 * to a large, or when the previous alloc was at the very end of 227 * the available space. If so, go back to the initial start. 228 */ 229 if (start >= limit) 230 start = pool->start; 231 232 if (limit + tbl->it_offset > mask) { 233 limit = mask - tbl->it_offset + 1; 234 /* If we're constrained on address range, first try 235 * at the masked hint to avoid O(n) search complexity, 236 * but on second pass, start at 0 in pool 0. 237 */ 238 if ((start & mask) >= limit || pass > 0) { 239 pool = &(tbl->pools[0]); 240 start = pool->start; 241 } else { 242 start &= mask; 243 } 244 } 245 246 if (dev) 247 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 248 1 << IOMMU_PAGE_SHIFT); 249 else 250 boundary_size = ALIGN(1UL << 32, 1 << IOMMU_PAGE_SHIFT); 251 /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */ 252 253 n = iommu_area_alloc(tbl->it_map, limit, start, npages, 254 tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT, 255 align_mask); 256 if (n == -1) { 257 if (likely(pass == 0)) { 258 /* First try the pool from the start */ 259 pool->hint = pool->start; 260 pass++; 261 goto again; 262 263 } else if (pass <= tbl->nr_pools) { 264 /* Now try scanning all the other pools */ 265 spin_unlock(&(pool->lock)); 266 pool_nr = (pool_nr + 1) & (tbl->nr_pools - 1); 267 pool = &tbl->pools[pool_nr]; 268 spin_lock(&(pool->lock)); 269 pool->hint = pool->start; 270 pass++; 271 goto again; 272 273 } else { 274 /* Give up */ 275 spin_unlock_irqrestore(&(pool->lock), flags); 276 return DMA_ERROR_CODE; 277 } 278 } 279 280 end = n + npages; 281 282 /* Bump the hint to a new block for small allocs. */ 283 if (largealloc) { 284 /* Don't bump to new block to avoid fragmentation */ 285 pool->hint = end; 286 } else { 287 /* Overflow will be taken care of at the next allocation */ 288 pool->hint = (end + tbl->it_blocksize - 1) & 289 ~(tbl->it_blocksize - 1); 290 } 291 292 /* Update handle for SG allocations */ 293 if (handle) 294 *handle = end; 295 296 spin_unlock_irqrestore(&(pool->lock), flags); 297 298 return n; 299 } 300 301 static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, 302 void *page, unsigned int npages, 303 enum dma_data_direction direction, 304 unsigned long mask, unsigned int align_order, 305 struct dma_attrs *attrs) 306 { 307 unsigned long entry; 308 dma_addr_t ret = DMA_ERROR_CODE; 309 int build_fail; 310 311 entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order); 312 313 if (unlikely(entry == DMA_ERROR_CODE)) 314 return DMA_ERROR_CODE; 315 316 entry += tbl->it_offset; /* Offset into real TCE table */ 317 ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */ 318 319 /* Put the TCEs in the HW table */ 320 build_fail = ppc_md.tce_build(tbl, entry, npages, 321 (unsigned long)page & IOMMU_PAGE_MASK, 322 direction, attrs); 323 324 /* ppc_md.tce_build() only returns non-zero for transient errors. 325 * Clean up the table bitmap in this case and return 326 * DMA_ERROR_CODE. For all other errors the functionality is 327 * not altered. 328 */ 329 if (unlikely(build_fail)) { 330 __iommu_free(tbl, ret, npages); 331 return DMA_ERROR_CODE; 332 } 333 334 /* Flush/invalidate TLB caches if necessary */ 335 if (ppc_md.tce_flush) 336 ppc_md.tce_flush(tbl); 337 338 /* Make sure updates are seen by hardware */ 339 mb(); 340 341 return ret; 342 } 343 344 static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr, 345 unsigned int npages) 346 { 347 unsigned long entry, free_entry; 348 349 entry = dma_addr >> IOMMU_PAGE_SHIFT; 350 free_entry = entry - tbl->it_offset; 351 352 if (((free_entry + npages) > tbl->it_size) || 353 (entry < tbl->it_offset)) { 354 if (printk_ratelimit()) { 355 printk(KERN_INFO "iommu_free: invalid entry\n"); 356 printk(KERN_INFO "\tentry = 0x%lx\n", entry); 357 printk(KERN_INFO "\tdma_addr = 0x%llx\n", (u64)dma_addr); 358 printk(KERN_INFO "\tTable = 0x%llx\n", (u64)tbl); 359 printk(KERN_INFO "\tbus# = 0x%llx\n", (u64)tbl->it_busno); 360 printk(KERN_INFO "\tsize = 0x%llx\n", (u64)tbl->it_size); 361 printk(KERN_INFO "\tstartOff = 0x%llx\n", (u64)tbl->it_offset); 362 printk(KERN_INFO "\tindex = 0x%llx\n", (u64)tbl->it_index); 363 WARN_ON(1); 364 } 365 366 return false; 367 } 368 369 return true; 370 } 371 372 static struct iommu_pool *get_pool(struct iommu_table *tbl, 373 unsigned long entry) 374 { 375 struct iommu_pool *p; 376 unsigned long largepool_start = tbl->large_pool.start; 377 378 /* The large pool is the last pool at the top of the table */ 379 if (entry >= largepool_start) { 380 p = &tbl->large_pool; 381 } else { 382 unsigned int pool_nr = entry / tbl->poolsize; 383 384 BUG_ON(pool_nr > tbl->nr_pools); 385 p = &tbl->pools[pool_nr]; 386 } 387 388 return p; 389 } 390 391 static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, 392 unsigned int npages) 393 { 394 unsigned long entry, free_entry; 395 unsigned long flags; 396 struct iommu_pool *pool; 397 398 entry = dma_addr >> IOMMU_PAGE_SHIFT; 399 free_entry = entry - tbl->it_offset; 400 401 pool = get_pool(tbl, free_entry); 402 403 if (!iommu_free_check(tbl, dma_addr, npages)) 404 return; 405 406 ppc_md.tce_free(tbl, entry, npages); 407 408 spin_lock_irqsave(&(pool->lock), flags); 409 bitmap_clear(tbl->it_map, free_entry, npages); 410 spin_unlock_irqrestore(&(pool->lock), flags); 411 } 412 413 static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, 414 unsigned int npages) 415 { 416 __iommu_free(tbl, dma_addr, npages); 417 418 /* Make sure TLB cache is flushed if the HW needs it. We do 419 * not do an mb() here on purpose, it is not needed on any of 420 * the current platforms. 421 */ 422 if (ppc_md.tce_flush) 423 ppc_md.tce_flush(tbl); 424 } 425 426 int iommu_map_sg(struct device *dev, struct iommu_table *tbl, 427 struct scatterlist *sglist, int nelems, 428 unsigned long mask, enum dma_data_direction direction, 429 struct dma_attrs *attrs) 430 { 431 dma_addr_t dma_next = 0, dma_addr; 432 struct scatterlist *s, *outs, *segstart; 433 int outcount, incount, i, build_fail = 0; 434 unsigned int align; 435 unsigned long handle; 436 unsigned int max_seg_size; 437 438 BUG_ON(direction == DMA_NONE); 439 440 if ((nelems == 0) || !tbl) 441 return 0; 442 443 outs = s = segstart = &sglist[0]; 444 outcount = 1; 445 incount = nelems; 446 handle = 0; 447 448 /* Init first segment length for backout at failure */ 449 outs->dma_length = 0; 450 451 DBG("sg mapping %d elements:\n", nelems); 452 453 max_seg_size = dma_get_max_seg_size(dev); 454 for_each_sg(sglist, s, nelems, i) { 455 unsigned long vaddr, npages, entry, slen; 456 457 slen = s->length; 458 /* Sanity check */ 459 if (slen == 0) { 460 dma_next = 0; 461 continue; 462 } 463 /* Allocate iommu entries for that segment */ 464 vaddr = (unsigned long) sg_virt(s); 465 npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE); 466 align = 0; 467 if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && slen >= PAGE_SIZE && 468 (vaddr & ~PAGE_MASK) == 0) 469 align = PAGE_SHIFT - IOMMU_PAGE_SHIFT; 470 entry = iommu_range_alloc(dev, tbl, npages, &handle, 471 mask >> IOMMU_PAGE_SHIFT, align); 472 473 DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen); 474 475 /* Handle failure */ 476 if (unlikely(entry == DMA_ERROR_CODE)) { 477 if (printk_ratelimit()) 478 dev_info(dev, "iommu_alloc failed, tbl %p " 479 "vaddr %lx npages %lu\n", tbl, vaddr, 480 npages); 481 goto failure; 482 } 483 484 /* Convert entry to a dma_addr_t */ 485 entry += tbl->it_offset; 486 dma_addr = entry << IOMMU_PAGE_SHIFT; 487 dma_addr |= (s->offset & ~IOMMU_PAGE_MASK); 488 489 DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n", 490 npages, entry, dma_addr); 491 492 /* Insert into HW table */ 493 build_fail = ppc_md.tce_build(tbl, entry, npages, 494 vaddr & IOMMU_PAGE_MASK, 495 direction, attrs); 496 if(unlikely(build_fail)) 497 goto failure; 498 499 /* If we are in an open segment, try merging */ 500 if (segstart != s) { 501 DBG(" - trying merge...\n"); 502 /* We cannot merge if: 503 * - allocated dma_addr isn't contiguous to previous allocation 504 */ 505 if (novmerge || (dma_addr != dma_next) || 506 (outs->dma_length + s->length > max_seg_size)) { 507 /* Can't merge: create a new segment */ 508 segstart = s; 509 outcount++; 510 outs = sg_next(outs); 511 DBG(" can't merge, new segment.\n"); 512 } else { 513 outs->dma_length += s->length; 514 DBG(" merged, new len: %ux\n", outs->dma_length); 515 } 516 } 517 518 if (segstart == s) { 519 /* This is a new segment, fill entries */ 520 DBG(" - filling new segment.\n"); 521 outs->dma_address = dma_addr; 522 outs->dma_length = slen; 523 } 524 525 /* Calculate next page pointer for contiguous check */ 526 dma_next = dma_addr + slen; 527 528 DBG(" - dma next is: %lx\n", dma_next); 529 } 530 531 /* Flush/invalidate TLB caches if necessary */ 532 if (ppc_md.tce_flush) 533 ppc_md.tce_flush(tbl); 534 535 DBG("mapped %d elements:\n", outcount); 536 537 /* For the sake of iommu_unmap_sg, we clear out the length in the 538 * next entry of the sglist if we didn't fill the list completely 539 */ 540 if (outcount < incount) { 541 outs = sg_next(outs); 542 outs->dma_address = DMA_ERROR_CODE; 543 outs->dma_length = 0; 544 } 545 546 /* Make sure updates are seen by hardware */ 547 mb(); 548 549 return outcount; 550 551 failure: 552 for_each_sg(sglist, s, nelems, i) { 553 if (s->dma_length != 0) { 554 unsigned long vaddr, npages; 555 556 vaddr = s->dma_address & IOMMU_PAGE_MASK; 557 npages = iommu_num_pages(s->dma_address, s->dma_length, 558 IOMMU_PAGE_SIZE); 559 __iommu_free(tbl, vaddr, npages); 560 s->dma_address = DMA_ERROR_CODE; 561 s->dma_length = 0; 562 } 563 if (s == outs) 564 break; 565 } 566 return 0; 567 } 568 569 570 void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, 571 int nelems, enum dma_data_direction direction, 572 struct dma_attrs *attrs) 573 { 574 struct scatterlist *sg; 575 576 BUG_ON(direction == DMA_NONE); 577 578 if (!tbl) 579 return; 580 581 sg = sglist; 582 while (nelems--) { 583 unsigned int npages; 584 dma_addr_t dma_handle = sg->dma_address; 585 586 if (sg->dma_length == 0) 587 break; 588 npages = iommu_num_pages(dma_handle, sg->dma_length, 589 IOMMU_PAGE_SIZE); 590 __iommu_free(tbl, dma_handle, npages); 591 sg = sg_next(sg); 592 } 593 594 /* Flush/invalidate TLBs if necessary. As for iommu_free(), we 595 * do not do an mb() here, the affected platforms do not need it 596 * when freeing. 597 */ 598 if (ppc_md.tce_flush) 599 ppc_md.tce_flush(tbl); 600 } 601 602 static void iommu_table_clear(struct iommu_table *tbl) 603 { 604 /* 605 * In case of firmware assisted dump system goes through clean 606 * reboot process at the time of system crash. Hence it's safe to 607 * clear the TCE entries if firmware assisted dump is active. 608 */ 609 if (!is_kdump_kernel() || is_fadump_active()) { 610 /* Clear the table in case firmware left allocations in it */ 611 ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size); 612 return; 613 } 614 615 #ifdef CONFIG_CRASH_DUMP 616 if (ppc_md.tce_get) { 617 unsigned long index, tceval, tcecount = 0; 618 619 /* Reserve the existing mappings left by the first kernel. */ 620 for (index = 0; index < tbl->it_size; index++) { 621 tceval = ppc_md.tce_get(tbl, index + tbl->it_offset); 622 /* 623 * Freed TCE entry contains 0x7fffffffffffffff on JS20 624 */ 625 if (tceval && (tceval != 0x7fffffffffffffffUL)) { 626 __set_bit(index, tbl->it_map); 627 tcecount++; 628 } 629 } 630 631 if ((tbl->it_size - tcecount) < KDUMP_MIN_TCE_ENTRIES) { 632 printk(KERN_WARNING "TCE table is full; freeing "); 633 printk(KERN_WARNING "%d entries for the kdump boot\n", 634 KDUMP_MIN_TCE_ENTRIES); 635 for (index = tbl->it_size - KDUMP_MIN_TCE_ENTRIES; 636 index < tbl->it_size; index++) 637 __clear_bit(index, tbl->it_map); 638 } 639 } 640 #endif 641 } 642 643 /* 644 * Build a iommu_table structure. This contains a bit map which 645 * is used to manage allocation of the tce space. 646 */ 647 struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) 648 { 649 unsigned long sz; 650 static int welcomed = 0; 651 struct page *page; 652 unsigned int i; 653 struct iommu_pool *p; 654 655 /* number of bytes needed for the bitmap */ 656 sz = (tbl->it_size + 7) >> 3; 657 658 page = alloc_pages_node(nid, GFP_ATOMIC, get_order(sz)); 659 if (!page) 660 panic("iommu_init_table: Can't allocate %ld bytes\n", sz); 661 tbl->it_map = page_address(page); 662 memset(tbl->it_map, 0, sz); 663 664 /* 665 * Reserve page 0 so it will not be used for any mappings. 666 * This avoids buggy drivers that consider page 0 to be invalid 667 * to crash the machine or even lose data. 668 */ 669 if (tbl->it_offset == 0) 670 set_bit(0, tbl->it_map); 671 672 /* We only split the IOMMU table if we have 1GB or more of space */ 673 if ((tbl->it_size << IOMMU_PAGE_SHIFT) >= (1UL * 1024 * 1024 * 1024)) 674 tbl->nr_pools = IOMMU_NR_POOLS; 675 else 676 tbl->nr_pools = 1; 677 678 /* We reserve the top 1/4 of the table for large allocations */ 679 tbl->poolsize = (tbl->it_size * 3 / 4) / tbl->nr_pools; 680 681 for (i = 0; i < tbl->nr_pools; i++) { 682 p = &tbl->pools[i]; 683 spin_lock_init(&(p->lock)); 684 p->start = tbl->poolsize * i; 685 p->hint = p->start; 686 p->end = p->start + tbl->poolsize; 687 } 688 689 p = &tbl->large_pool; 690 spin_lock_init(&(p->lock)); 691 p->start = tbl->poolsize * i; 692 p->hint = p->start; 693 p->end = tbl->it_size; 694 695 iommu_table_clear(tbl); 696 697 if (!welcomed) { 698 printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n", 699 novmerge ? "disabled" : "enabled"); 700 welcomed = 1; 701 } 702 703 return tbl; 704 } 705 706 void iommu_free_table(struct iommu_table *tbl, const char *node_name) 707 { 708 unsigned long bitmap_sz, i; 709 unsigned int order; 710 711 if (!tbl || !tbl->it_map) { 712 printk(KERN_ERR "%s: expected TCE map for %s\n", __func__, 713 node_name); 714 return; 715 } 716 717 /* verify that table contains no entries */ 718 /* it_size is in entries, and we're examining 64 at a time */ 719 for (i = 0; i < (tbl->it_size/64); i++) { 720 if (tbl->it_map[i] != 0) { 721 printk(KERN_WARNING "%s: Unexpected TCEs for %s\n", 722 __func__, node_name); 723 break; 724 } 725 } 726 727 /* calculate bitmap size in bytes */ 728 bitmap_sz = (tbl->it_size + 7) / 8; 729 730 /* free bitmap */ 731 order = get_order(bitmap_sz); 732 free_pages((unsigned long) tbl->it_map, order); 733 734 /* free table */ 735 kfree(tbl); 736 } 737 738 /* Creates TCEs for a user provided buffer. The user buffer must be 739 * contiguous real kernel storage (not vmalloc). The address passed here 740 * comprises a page address and offset into that page. The dma_addr_t 741 * returned will point to the same byte within the page as was passed in. 742 */ 743 dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, 744 struct page *page, unsigned long offset, size_t size, 745 unsigned long mask, enum dma_data_direction direction, 746 struct dma_attrs *attrs) 747 { 748 dma_addr_t dma_handle = DMA_ERROR_CODE; 749 void *vaddr; 750 unsigned long uaddr; 751 unsigned int npages, align; 752 753 BUG_ON(direction == DMA_NONE); 754 755 vaddr = page_address(page) + offset; 756 uaddr = (unsigned long)vaddr; 757 npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE); 758 759 if (tbl) { 760 align = 0; 761 if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && size >= PAGE_SIZE && 762 ((unsigned long)vaddr & ~PAGE_MASK) == 0) 763 align = PAGE_SHIFT - IOMMU_PAGE_SHIFT; 764 765 dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction, 766 mask >> IOMMU_PAGE_SHIFT, align, 767 attrs); 768 if (dma_handle == DMA_ERROR_CODE) { 769 if (printk_ratelimit()) { 770 dev_info(dev, "iommu_alloc failed, tbl %p " 771 "vaddr %p npages %d\n", tbl, vaddr, 772 npages); 773 } 774 } else 775 dma_handle |= (uaddr & ~IOMMU_PAGE_MASK); 776 } 777 778 return dma_handle; 779 } 780 781 void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle, 782 size_t size, enum dma_data_direction direction, 783 struct dma_attrs *attrs) 784 { 785 unsigned int npages; 786 787 BUG_ON(direction == DMA_NONE); 788 789 if (tbl) { 790 npages = iommu_num_pages(dma_handle, size, IOMMU_PAGE_SIZE); 791 iommu_free(tbl, dma_handle, npages); 792 } 793 } 794 795 /* Allocates a contiguous real buffer and creates mappings over it. 796 * Returns the virtual address of the buffer and sets dma_handle 797 * to the dma address (mapping) of the first page. 798 */ 799 void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, 800 size_t size, dma_addr_t *dma_handle, 801 unsigned long mask, gfp_t flag, int node) 802 { 803 void *ret = NULL; 804 dma_addr_t mapping; 805 unsigned int order; 806 unsigned int nio_pages, io_order; 807 struct page *page; 808 809 size = PAGE_ALIGN(size); 810 order = get_order(size); 811 812 /* 813 * Client asked for way too much space. This is checked later 814 * anyway. It is easier to debug here for the drivers than in 815 * the tce tables. 816 */ 817 if (order >= IOMAP_MAX_ORDER) { 818 dev_info(dev, "iommu_alloc_consistent size too large: 0x%lx\n", 819 size); 820 return NULL; 821 } 822 823 if (!tbl) 824 return NULL; 825 826 /* Alloc enough pages (and possibly more) */ 827 page = alloc_pages_node(node, flag, order); 828 if (!page) 829 return NULL; 830 ret = page_address(page); 831 memset(ret, 0, size); 832 833 /* Set up tces to cover the allocated range */ 834 nio_pages = size >> IOMMU_PAGE_SHIFT; 835 io_order = get_iommu_order(size); 836 mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, 837 mask >> IOMMU_PAGE_SHIFT, io_order, NULL); 838 if (mapping == DMA_ERROR_CODE) { 839 free_pages((unsigned long)ret, order); 840 return NULL; 841 } 842 *dma_handle = mapping; 843 return ret; 844 } 845 846 void iommu_free_coherent(struct iommu_table *tbl, size_t size, 847 void *vaddr, dma_addr_t dma_handle) 848 { 849 if (tbl) { 850 unsigned int nio_pages; 851 852 size = PAGE_ALIGN(size); 853 nio_pages = size >> IOMMU_PAGE_SHIFT; 854 iommu_free(tbl, dma_handle, nio_pages); 855 size = PAGE_ALIGN(size); 856 free_pages((unsigned long)vaddr, get_order(size)); 857 } 858 } 859