1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright © 2006-2009, Intel Corporation. 4 * 5 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 6 */ 7 8 #include <linux/iova.h> 9 #include <linux/module.h> 10 #include <linux/slab.h> 11 #include <linux/smp.h> 12 #include <linux/bitops.h> 13 #include <linux/cpu.h> 14 15 /* The anchor node sits above the top of the usable address space */ 16 #define IOVA_ANCHOR ~0UL 17 18 static bool iova_rcache_insert(struct iova_domain *iovad, 19 unsigned long pfn, 20 unsigned long size); 21 static unsigned long iova_rcache_get(struct iova_domain *iovad, 22 unsigned long size, 23 unsigned long limit_pfn); 24 static void init_iova_rcaches(struct iova_domain *iovad); 25 static void free_iova_rcaches(struct iova_domain *iovad); 26 static void fq_destroy_all_entries(struct iova_domain *iovad); 27 static void fq_flush_timeout(struct timer_list *t); 28 29 void 30 init_iova_domain(struct iova_domain *iovad, unsigned long granule, 31 unsigned long start_pfn) 32 { 33 /* 34 * IOVA granularity will normally be equal to the smallest 35 * supported IOMMU page size; both *must* be capable of 36 * representing individual CPU pages exactly. 37 */ 38 BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule)); 39 40 spin_lock_init(&iovad->iova_rbtree_lock); 41 iovad->rbroot = RB_ROOT; 42 iovad->cached_node = &iovad->anchor.node; 43 iovad->cached32_node = &iovad->anchor.node; 44 iovad->granule = granule; 45 iovad->start_pfn = start_pfn; 46 iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad)); 47 iovad->max32_alloc_size = iovad->dma_32bit_pfn; 48 iovad->flush_cb = NULL; 49 iovad->fq = NULL; 50 iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR; 51 rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node); 52 rb_insert_color(&iovad->anchor.node, &iovad->rbroot); 53 init_iova_rcaches(iovad); 54 } 55 EXPORT_SYMBOL_GPL(init_iova_domain); 56 57 bool has_iova_flush_queue(struct iova_domain *iovad) 58 { 59 return !!iovad->fq; 60 } 61 62 static void free_iova_flush_queue(struct iova_domain *iovad) 63 { 64 if (!has_iova_flush_queue(iovad)) 65 return; 66 67 if (timer_pending(&iovad->fq_timer)) 68 del_timer(&iovad->fq_timer); 69 70 fq_destroy_all_entries(iovad); 71 72 free_percpu(iovad->fq); 73 74 iovad->fq = NULL; 75 iovad->flush_cb = NULL; 76 iovad->entry_dtor = NULL; 77 } 78 79 int init_iova_flush_queue(struct iova_domain *iovad, 80 iova_flush_cb flush_cb, iova_entry_dtor entry_dtor) 81 { 82 struct iova_fq __percpu *queue; 83 int cpu; 84 85 atomic64_set(&iovad->fq_flush_start_cnt, 0); 86 atomic64_set(&iovad->fq_flush_finish_cnt, 0); 87 88 queue = alloc_percpu(struct iova_fq); 89 if (!queue) 90 return -ENOMEM; 91 92 iovad->flush_cb = flush_cb; 93 iovad->entry_dtor = entry_dtor; 94 95 for_each_possible_cpu(cpu) { 96 struct iova_fq *fq; 97 98 fq = per_cpu_ptr(queue, cpu); 99 fq->head = 0; 100 fq->tail = 0; 101 102 spin_lock_init(&fq->lock); 103 } 104 105 smp_wmb(); 106 107 iovad->fq = queue; 108 109 timer_setup(&iovad->fq_timer, fq_flush_timeout, 0); 110 atomic_set(&iovad->fq_timer_on, 0); 111 112 return 0; 113 } 114 EXPORT_SYMBOL_GPL(init_iova_flush_queue); 115 116 static struct rb_node * 117 __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn) 118 { 119 if (limit_pfn <= iovad->dma_32bit_pfn) 120 return iovad->cached32_node; 121 122 return iovad->cached_node; 123 } 124 125 static void 126 __cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new) 127 { 128 if (new->pfn_hi < iovad->dma_32bit_pfn) 129 iovad->cached32_node = &new->node; 130 else 131 iovad->cached_node = &new->node; 132 } 133 134 static void 135 __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) 136 { 137 struct iova *cached_iova; 138 139 cached_iova = rb_entry(iovad->cached32_node, struct iova, node); 140 if (free == cached_iova || 141 (free->pfn_hi < iovad->dma_32bit_pfn && 142 free->pfn_lo >= cached_iova->pfn_lo)) { 143 iovad->cached32_node = rb_next(&free->node); 144 iovad->max32_alloc_size = iovad->dma_32bit_pfn; 145 } 146 147 cached_iova = rb_entry(iovad->cached_node, struct iova, node); 148 if (free->pfn_lo >= cached_iova->pfn_lo) 149 iovad->cached_node = rb_next(&free->node); 150 } 151 152 /* Insert the iova into domain rbtree by holding writer lock */ 153 static void 154 iova_insert_rbtree(struct rb_root *root, struct iova *iova, 155 struct rb_node *start) 156 { 157 struct rb_node **new, *parent = NULL; 158 159 new = (start) ? &start : &(root->rb_node); 160 /* Figure out where to put new node */ 161 while (*new) { 162 struct iova *this = rb_entry(*new, struct iova, node); 163 164 parent = *new; 165 166 if (iova->pfn_lo < this->pfn_lo) 167 new = &((*new)->rb_left); 168 else if (iova->pfn_lo > this->pfn_lo) 169 new = &((*new)->rb_right); 170 else { 171 WARN_ON(1); /* this should not happen */ 172 return; 173 } 174 } 175 /* Add new node and rebalance tree. */ 176 rb_link_node(&iova->node, parent, new); 177 rb_insert_color(&iova->node, root); 178 } 179 180 static int __alloc_and_insert_iova_range(struct iova_domain *iovad, 181 unsigned long size, unsigned long limit_pfn, 182 struct iova *new, bool size_aligned) 183 { 184 struct rb_node *curr, *prev; 185 struct iova *curr_iova; 186 unsigned long flags; 187 unsigned long new_pfn; 188 unsigned long align_mask = ~0UL; 189 190 if (size_aligned) 191 align_mask <<= fls_long(size - 1); 192 193 /* Walk the tree backwards */ 194 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 195 if (limit_pfn <= iovad->dma_32bit_pfn && 196 size >= iovad->max32_alloc_size) 197 goto iova32_full; 198 199 curr = __get_cached_rbnode(iovad, limit_pfn); 200 curr_iova = rb_entry(curr, struct iova, node); 201 do { 202 limit_pfn = min(limit_pfn, curr_iova->pfn_lo); 203 new_pfn = (limit_pfn - size) & align_mask; 204 prev = curr; 205 curr = rb_prev(curr); 206 curr_iova = rb_entry(curr, struct iova, node); 207 } while (curr && new_pfn <= curr_iova->pfn_hi); 208 209 if (limit_pfn < size || new_pfn < iovad->start_pfn) { 210 iovad->max32_alloc_size = size; 211 goto iova32_full; 212 } 213 214 /* pfn_lo will point to size aligned address if size_aligned is set */ 215 new->pfn_lo = new_pfn; 216 new->pfn_hi = new->pfn_lo + size - 1; 217 218 /* If we have 'prev', it's a valid place to start the insertion. */ 219 iova_insert_rbtree(&iovad->rbroot, new, prev); 220 __cached_rbnode_insert_update(iovad, new); 221 222 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 223 return 0; 224 225 iova32_full: 226 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 227 return -ENOMEM; 228 } 229 230 static struct kmem_cache *iova_cache; 231 static unsigned int iova_cache_users; 232 static DEFINE_MUTEX(iova_cache_mutex); 233 234 struct iova *alloc_iova_mem(void) 235 { 236 return kmem_cache_alloc(iova_cache, GFP_ATOMIC); 237 } 238 EXPORT_SYMBOL(alloc_iova_mem); 239 240 void free_iova_mem(struct iova *iova) 241 { 242 if (iova->pfn_lo != IOVA_ANCHOR) 243 kmem_cache_free(iova_cache, iova); 244 } 245 EXPORT_SYMBOL(free_iova_mem); 246 247 int iova_cache_get(void) 248 { 249 mutex_lock(&iova_cache_mutex); 250 if (!iova_cache_users) { 251 iova_cache = kmem_cache_create( 252 "iommu_iova", sizeof(struct iova), 0, 253 SLAB_HWCACHE_ALIGN, NULL); 254 if (!iova_cache) { 255 mutex_unlock(&iova_cache_mutex); 256 printk(KERN_ERR "Couldn't create iova cache\n"); 257 return -ENOMEM; 258 } 259 } 260 261 iova_cache_users++; 262 mutex_unlock(&iova_cache_mutex); 263 264 return 0; 265 } 266 EXPORT_SYMBOL_GPL(iova_cache_get); 267 268 void iova_cache_put(void) 269 { 270 mutex_lock(&iova_cache_mutex); 271 if (WARN_ON(!iova_cache_users)) { 272 mutex_unlock(&iova_cache_mutex); 273 return; 274 } 275 iova_cache_users--; 276 if (!iova_cache_users) 277 kmem_cache_destroy(iova_cache); 278 mutex_unlock(&iova_cache_mutex); 279 } 280 EXPORT_SYMBOL_GPL(iova_cache_put); 281 282 /** 283 * alloc_iova - allocates an iova 284 * @iovad: - iova domain in question 285 * @size: - size of page frames to allocate 286 * @limit_pfn: - max limit address 287 * @size_aligned: - set if size_aligned address range is required 288 * This function allocates an iova in the range iovad->start_pfn to limit_pfn, 289 * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned 290 * flag is set then the allocated address iova->pfn_lo will be naturally 291 * aligned on roundup_power_of_two(size). 292 */ 293 struct iova * 294 alloc_iova(struct iova_domain *iovad, unsigned long size, 295 unsigned long limit_pfn, 296 bool size_aligned) 297 { 298 struct iova *new_iova; 299 int ret; 300 301 new_iova = alloc_iova_mem(); 302 if (!new_iova) 303 return NULL; 304 305 ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1, 306 new_iova, size_aligned); 307 308 if (ret) { 309 free_iova_mem(new_iova); 310 return NULL; 311 } 312 313 return new_iova; 314 } 315 EXPORT_SYMBOL_GPL(alloc_iova); 316 317 static struct iova * 318 private_find_iova(struct iova_domain *iovad, unsigned long pfn) 319 { 320 struct rb_node *node = iovad->rbroot.rb_node; 321 322 assert_spin_locked(&iovad->iova_rbtree_lock); 323 324 while (node) { 325 struct iova *iova = rb_entry(node, struct iova, node); 326 327 if (pfn < iova->pfn_lo) 328 node = node->rb_left; 329 else if (pfn > iova->pfn_hi) 330 node = node->rb_right; 331 else 332 return iova; /* pfn falls within iova's range */ 333 } 334 335 return NULL; 336 } 337 338 static void private_free_iova(struct iova_domain *iovad, struct iova *iova) 339 { 340 assert_spin_locked(&iovad->iova_rbtree_lock); 341 __cached_rbnode_delete_update(iovad, iova); 342 rb_erase(&iova->node, &iovad->rbroot); 343 free_iova_mem(iova); 344 } 345 346 /** 347 * find_iova - finds an iova for a given pfn 348 * @iovad: - iova domain in question. 349 * @pfn: - page frame number 350 * This function finds and returns an iova belonging to the 351 * given doamin which matches the given pfn. 352 */ 353 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) 354 { 355 unsigned long flags; 356 struct iova *iova; 357 358 /* Take the lock so that no other thread is manipulating the rbtree */ 359 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 360 iova = private_find_iova(iovad, pfn); 361 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 362 return iova; 363 } 364 EXPORT_SYMBOL_GPL(find_iova); 365 366 /** 367 * __free_iova - frees the given iova 368 * @iovad: iova domain in question. 369 * @iova: iova in question. 370 * Frees the given iova belonging to the giving domain 371 */ 372 void 373 __free_iova(struct iova_domain *iovad, struct iova *iova) 374 { 375 unsigned long flags; 376 377 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 378 private_free_iova(iovad, iova); 379 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 380 } 381 EXPORT_SYMBOL_GPL(__free_iova); 382 383 /** 384 * free_iova - finds and frees the iova for a given pfn 385 * @iovad: - iova domain in question. 386 * @pfn: - pfn that is allocated previously 387 * This functions finds an iova for a given pfn and then 388 * frees the iova from that domain. 389 */ 390 void 391 free_iova(struct iova_domain *iovad, unsigned long pfn) 392 { 393 struct iova *iova = find_iova(iovad, pfn); 394 395 if (iova) 396 __free_iova(iovad, iova); 397 398 } 399 EXPORT_SYMBOL_GPL(free_iova); 400 401 /** 402 * alloc_iova_fast - allocates an iova from rcache 403 * @iovad: - iova domain in question 404 * @size: - size of page frames to allocate 405 * @limit_pfn: - max limit address 406 * @flush_rcache: - set to flush rcache on regular allocation failure 407 * This function tries to satisfy an iova allocation from the rcache, 408 * and falls back to regular allocation on failure. If regular allocation 409 * fails too and the flush_rcache flag is set then the rcache will be flushed. 410 */ 411 unsigned long 412 alloc_iova_fast(struct iova_domain *iovad, unsigned long size, 413 unsigned long limit_pfn, bool flush_rcache) 414 { 415 unsigned long iova_pfn; 416 struct iova *new_iova; 417 418 iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1); 419 if (iova_pfn) 420 return iova_pfn; 421 422 retry: 423 new_iova = alloc_iova(iovad, size, limit_pfn, true); 424 if (!new_iova) { 425 unsigned int cpu; 426 427 if (!flush_rcache) 428 return 0; 429 430 /* Try replenishing IOVAs by flushing rcache. */ 431 flush_rcache = false; 432 for_each_online_cpu(cpu) 433 free_cpu_cached_iovas(cpu, iovad); 434 goto retry; 435 } 436 437 return new_iova->pfn_lo; 438 } 439 EXPORT_SYMBOL_GPL(alloc_iova_fast); 440 441 /** 442 * free_iova_fast - free iova pfn range into rcache 443 * @iovad: - iova domain in question. 444 * @pfn: - pfn that is allocated previously 445 * @size: - # of pages in range 446 * This functions frees an iova range by trying to put it into the rcache, 447 * falling back to regular iova deallocation via free_iova() if this fails. 448 */ 449 void 450 free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size) 451 { 452 if (iova_rcache_insert(iovad, pfn, size)) 453 return; 454 455 free_iova(iovad, pfn); 456 } 457 EXPORT_SYMBOL_GPL(free_iova_fast); 458 459 #define fq_ring_for_each(i, fq) \ 460 for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE) 461 462 static inline bool fq_full(struct iova_fq *fq) 463 { 464 assert_spin_locked(&fq->lock); 465 return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head); 466 } 467 468 static inline unsigned fq_ring_add(struct iova_fq *fq) 469 { 470 unsigned idx = fq->tail; 471 472 assert_spin_locked(&fq->lock); 473 474 fq->tail = (idx + 1) % IOVA_FQ_SIZE; 475 476 return idx; 477 } 478 479 static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq) 480 { 481 u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt); 482 unsigned idx; 483 484 assert_spin_locked(&fq->lock); 485 486 fq_ring_for_each(idx, fq) { 487 488 if (fq->entries[idx].counter >= counter) 489 break; 490 491 if (iovad->entry_dtor) 492 iovad->entry_dtor(fq->entries[idx].data); 493 494 free_iova_fast(iovad, 495 fq->entries[idx].iova_pfn, 496 fq->entries[idx].pages); 497 498 fq->head = (fq->head + 1) % IOVA_FQ_SIZE; 499 } 500 } 501 502 static void iova_domain_flush(struct iova_domain *iovad) 503 { 504 atomic64_inc(&iovad->fq_flush_start_cnt); 505 iovad->flush_cb(iovad); 506 atomic64_inc(&iovad->fq_flush_finish_cnt); 507 } 508 509 static void fq_destroy_all_entries(struct iova_domain *iovad) 510 { 511 int cpu; 512 513 /* 514 * This code runs when the iova_domain is being detroyed, so don't 515 * bother to free iovas, just call the entry_dtor on all remaining 516 * entries. 517 */ 518 if (!iovad->entry_dtor) 519 return; 520 521 for_each_possible_cpu(cpu) { 522 struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu); 523 int idx; 524 525 fq_ring_for_each(idx, fq) 526 iovad->entry_dtor(fq->entries[idx].data); 527 } 528 } 529 530 static void fq_flush_timeout(struct timer_list *t) 531 { 532 struct iova_domain *iovad = from_timer(iovad, t, fq_timer); 533 int cpu; 534 535 atomic_set(&iovad->fq_timer_on, 0); 536 iova_domain_flush(iovad); 537 538 for_each_possible_cpu(cpu) { 539 unsigned long flags; 540 struct iova_fq *fq; 541 542 fq = per_cpu_ptr(iovad->fq, cpu); 543 spin_lock_irqsave(&fq->lock, flags); 544 fq_ring_free(iovad, fq); 545 spin_unlock_irqrestore(&fq->lock, flags); 546 } 547 } 548 549 void queue_iova(struct iova_domain *iovad, 550 unsigned long pfn, unsigned long pages, 551 unsigned long data) 552 { 553 struct iova_fq *fq = raw_cpu_ptr(iovad->fq); 554 unsigned long flags; 555 unsigned idx; 556 557 spin_lock_irqsave(&fq->lock, flags); 558 559 /* 560 * First remove all entries from the flush queue that have already been 561 * flushed out on another CPU. This makes the fq_full() check below less 562 * likely to be true. 563 */ 564 fq_ring_free(iovad, fq); 565 566 if (fq_full(fq)) { 567 iova_domain_flush(iovad); 568 fq_ring_free(iovad, fq); 569 } 570 571 idx = fq_ring_add(fq); 572 573 fq->entries[idx].iova_pfn = pfn; 574 fq->entries[idx].pages = pages; 575 fq->entries[idx].data = data; 576 fq->entries[idx].counter = atomic64_read(&iovad->fq_flush_start_cnt); 577 578 spin_unlock_irqrestore(&fq->lock, flags); 579 580 /* Avoid false sharing as much as possible. */ 581 if (!atomic_read(&iovad->fq_timer_on) && 582 !atomic_cmpxchg(&iovad->fq_timer_on, 0, 1)) 583 mod_timer(&iovad->fq_timer, 584 jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); 585 } 586 EXPORT_SYMBOL_GPL(queue_iova); 587 588 /** 589 * put_iova_domain - destroys the iova doamin 590 * @iovad: - iova domain in question. 591 * All the iova's in that domain are destroyed. 592 */ 593 void put_iova_domain(struct iova_domain *iovad) 594 { 595 struct iova *iova, *tmp; 596 597 free_iova_flush_queue(iovad); 598 free_iova_rcaches(iovad); 599 rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node) 600 free_iova_mem(iova); 601 } 602 EXPORT_SYMBOL_GPL(put_iova_domain); 603 604 static int 605 __is_range_overlap(struct rb_node *node, 606 unsigned long pfn_lo, unsigned long pfn_hi) 607 { 608 struct iova *iova = rb_entry(node, struct iova, node); 609 610 if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo)) 611 return 1; 612 return 0; 613 } 614 615 static inline struct iova * 616 alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi) 617 { 618 struct iova *iova; 619 620 iova = alloc_iova_mem(); 621 if (iova) { 622 iova->pfn_lo = pfn_lo; 623 iova->pfn_hi = pfn_hi; 624 } 625 626 return iova; 627 } 628 629 static struct iova * 630 __insert_new_range(struct iova_domain *iovad, 631 unsigned long pfn_lo, unsigned long pfn_hi) 632 { 633 struct iova *iova; 634 635 iova = alloc_and_init_iova(pfn_lo, pfn_hi); 636 if (iova) 637 iova_insert_rbtree(&iovad->rbroot, iova, NULL); 638 639 return iova; 640 } 641 642 static void 643 __adjust_overlap_range(struct iova *iova, 644 unsigned long *pfn_lo, unsigned long *pfn_hi) 645 { 646 if (*pfn_lo < iova->pfn_lo) 647 iova->pfn_lo = *pfn_lo; 648 if (*pfn_hi > iova->pfn_hi) 649 *pfn_lo = iova->pfn_hi + 1; 650 } 651 652 /** 653 * reserve_iova - reserves an iova in the given range 654 * @iovad: - iova domain pointer 655 * @pfn_lo: - lower page frame address 656 * @pfn_hi:- higher pfn adderss 657 * This function allocates reserves the address range from pfn_lo to pfn_hi so 658 * that this address is not dished out as part of alloc_iova. 659 */ 660 struct iova * 661 reserve_iova(struct iova_domain *iovad, 662 unsigned long pfn_lo, unsigned long pfn_hi) 663 { 664 struct rb_node *node; 665 unsigned long flags; 666 struct iova *iova; 667 unsigned int overlap = 0; 668 669 /* Don't allow nonsensical pfns */ 670 if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad)))) 671 return NULL; 672 673 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 674 for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { 675 if (__is_range_overlap(node, pfn_lo, pfn_hi)) { 676 iova = rb_entry(node, struct iova, node); 677 __adjust_overlap_range(iova, &pfn_lo, &pfn_hi); 678 if ((pfn_lo >= iova->pfn_lo) && 679 (pfn_hi <= iova->pfn_hi)) 680 goto finish; 681 overlap = 1; 682 683 } else if (overlap) 684 break; 685 } 686 687 /* We are here either because this is the first reserver node 688 * or need to insert remaining non overlap addr range 689 */ 690 iova = __insert_new_range(iovad, pfn_lo, pfn_hi); 691 finish: 692 693 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 694 return iova; 695 } 696 EXPORT_SYMBOL_GPL(reserve_iova); 697 698 /** 699 * copy_reserved_iova - copies the reserved between domains 700 * @from: - source doamin from where to copy 701 * @to: - destination domin where to copy 702 * This function copies reserved iova's from one doamin to 703 * other. 704 */ 705 void 706 copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) 707 { 708 unsigned long flags; 709 struct rb_node *node; 710 711 spin_lock_irqsave(&from->iova_rbtree_lock, flags); 712 for (node = rb_first(&from->rbroot); node; node = rb_next(node)) { 713 struct iova *iova = rb_entry(node, struct iova, node); 714 struct iova *new_iova; 715 716 if (iova->pfn_lo == IOVA_ANCHOR) 717 continue; 718 719 new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); 720 if (!new_iova) 721 printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", 722 iova->pfn_lo, iova->pfn_lo); 723 } 724 spin_unlock_irqrestore(&from->iova_rbtree_lock, flags); 725 } 726 EXPORT_SYMBOL_GPL(copy_reserved_iova); 727 728 struct iova * 729 split_and_remove_iova(struct iova_domain *iovad, struct iova *iova, 730 unsigned long pfn_lo, unsigned long pfn_hi) 731 { 732 unsigned long flags; 733 struct iova *prev = NULL, *next = NULL; 734 735 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 736 if (iova->pfn_lo < pfn_lo) { 737 prev = alloc_and_init_iova(iova->pfn_lo, pfn_lo - 1); 738 if (prev == NULL) 739 goto error; 740 } 741 if (iova->pfn_hi > pfn_hi) { 742 next = alloc_and_init_iova(pfn_hi + 1, iova->pfn_hi); 743 if (next == NULL) 744 goto error; 745 } 746 747 __cached_rbnode_delete_update(iovad, iova); 748 rb_erase(&iova->node, &iovad->rbroot); 749 750 if (prev) { 751 iova_insert_rbtree(&iovad->rbroot, prev, NULL); 752 iova->pfn_lo = pfn_lo; 753 } 754 if (next) { 755 iova_insert_rbtree(&iovad->rbroot, next, NULL); 756 iova->pfn_hi = pfn_hi; 757 } 758 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 759 760 return iova; 761 762 error: 763 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 764 if (prev) 765 free_iova_mem(prev); 766 return NULL; 767 } 768 769 /* 770 * Magazine caches for IOVA ranges. For an introduction to magazines, 771 * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab 772 * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams. 773 * For simplicity, we use a static magazine size and don't implement the 774 * dynamic size tuning described in the paper. 775 */ 776 777 #define IOVA_MAG_SIZE 128 778 779 struct iova_magazine { 780 unsigned long size; 781 unsigned long pfns[IOVA_MAG_SIZE]; 782 }; 783 784 struct iova_cpu_rcache { 785 spinlock_t lock; 786 struct iova_magazine *loaded; 787 struct iova_magazine *prev; 788 }; 789 790 static struct iova_magazine *iova_magazine_alloc(gfp_t flags) 791 { 792 return kzalloc(sizeof(struct iova_magazine), flags); 793 } 794 795 static void iova_magazine_free(struct iova_magazine *mag) 796 { 797 kfree(mag); 798 } 799 800 static void 801 iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad) 802 { 803 unsigned long flags; 804 int i; 805 806 if (!mag) 807 return; 808 809 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 810 811 for (i = 0 ; i < mag->size; ++i) { 812 struct iova *iova = private_find_iova(iovad, mag->pfns[i]); 813 814 BUG_ON(!iova); 815 private_free_iova(iovad, iova); 816 } 817 818 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 819 820 mag->size = 0; 821 } 822 823 static bool iova_magazine_full(struct iova_magazine *mag) 824 { 825 return (mag && mag->size == IOVA_MAG_SIZE); 826 } 827 828 static bool iova_magazine_empty(struct iova_magazine *mag) 829 { 830 return (!mag || mag->size == 0); 831 } 832 833 static unsigned long iova_magazine_pop(struct iova_magazine *mag, 834 unsigned long limit_pfn) 835 { 836 int i; 837 unsigned long pfn; 838 839 BUG_ON(iova_magazine_empty(mag)); 840 841 /* Only fall back to the rbtree if we have no suitable pfns at all */ 842 for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--) 843 if (i == 0) 844 return 0; 845 846 /* Swap it to pop it */ 847 pfn = mag->pfns[i]; 848 mag->pfns[i] = mag->pfns[--mag->size]; 849 850 return pfn; 851 } 852 853 static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) 854 { 855 BUG_ON(iova_magazine_full(mag)); 856 857 mag->pfns[mag->size++] = pfn; 858 } 859 860 static void init_iova_rcaches(struct iova_domain *iovad) 861 { 862 struct iova_cpu_rcache *cpu_rcache; 863 struct iova_rcache *rcache; 864 unsigned int cpu; 865 int i; 866 867 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { 868 rcache = &iovad->rcaches[i]; 869 spin_lock_init(&rcache->lock); 870 rcache->depot_size = 0; 871 rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size()); 872 if (WARN_ON(!rcache->cpu_rcaches)) 873 continue; 874 for_each_possible_cpu(cpu) { 875 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); 876 spin_lock_init(&cpu_rcache->lock); 877 cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL); 878 cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL); 879 } 880 } 881 } 882 883 /* 884 * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and 885 * return true on success. Can fail if rcache is full and we can't free 886 * space, and free_iova() (our only caller) will then return the IOVA 887 * range to the rbtree instead. 888 */ 889 static bool __iova_rcache_insert(struct iova_domain *iovad, 890 struct iova_rcache *rcache, 891 unsigned long iova_pfn) 892 { 893 struct iova_magazine *mag_to_free = NULL; 894 struct iova_cpu_rcache *cpu_rcache; 895 bool can_insert = false; 896 unsigned long flags; 897 898 cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); 899 spin_lock_irqsave(&cpu_rcache->lock, flags); 900 901 if (!iova_magazine_full(cpu_rcache->loaded)) { 902 can_insert = true; 903 } else if (!iova_magazine_full(cpu_rcache->prev)) { 904 swap(cpu_rcache->prev, cpu_rcache->loaded); 905 can_insert = true; 906 } else { 907 struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC); 908 909 if (new_mag) { 910 spin_lock(&rcache->lock); 911 if (rcache->depot_size < MAX_GLOBAL_MAGS) { 912 rcache->depot[rcache->depot_size++] = 913 cpu_rcache->loaded; 914 } else { 915 mag_to_free = cpu_rcache->loaded; 916 } 917 spin_unlock(&rcache->lock); 918 919 cpu_rcache->loaded = new_mag; 920 can_insert = true; 921 } 922 } 923 924 if (can_insert) 925 iova_magazine_push(cpu_rcache->loaded, iova_pfn); 926 927 spin_unlock_irqrestore(&cpu_rcache->lock, flags); 928 929 if (mag_to_free) { 930 iova_magazine_free_pfns(mag_to_free, iovad); 931 iova_magazine_free(mag_to_free); 932 } 933 934 return can_insert; 935 } 936 937 static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn, 938 unsigned long size) 939 { 940 unsigned int log_size = order_base_2(size); 941 942 if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) 943 return false; 944 945 return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn); 946 } 947 948 /* 949 * Caller wants to allocate a new IOVA range from 'rcache'. If we can 950 * satisfy the request, return a matching non-NULL range and remove 951 * it from the 'rcache'. 952 */ 953 static unsigned long __iova_rcache_get(struct iova_rcache *rcache, 954 unsigned long limit_pfn) 955 { 956 struct iova_cpu_rcache *cpu_rcache; 957 unsigned long iova_pfn = 0; 958 bool has_pfn = false; 959 unsigned long flags; 960 961 cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); 962 spin_lock_irqsave(&cpu_rcache->lock, flags); 963 964 if (!iova_magazine_empty(cpu_rcache->loaded)) { 965 has_pfn = true; 966 } else if (!iova_magazine_empty(cpu_rcache->prev)) { 967 swap(cpu_rcache->prev, cpu_rcache->loaded); 968 has_pfn = true; 969 } else { 970 spin_lock(&rcache->lock); 971 if (rcache->depot_size > 0) { 972 iova_magazine_free(cpu_rcache->loaded); 973 cpu_rcache->loaded = rcache->depot[--rcache->depot_size]; 974 has_pfn = true; 975 } 976 spin_unlock(&rcache->lock); 977 } 978 979 if (has_pfn) 980 iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn); 981 982 spin_unlock_irqrestore(&cpu_rcache->lock, flags); 983 984 return iova_pfn; 985 } 986 987 /* 988 * Try to satisfy IOVA allocation range from rcache. Fail if requested 989 * size is too big or the DMA limit we are given isn't satisfied by the 990 * top element in the magazine. 991 */ 992 static unsigned long iova_rcache_get(struct iova_domain *iovad, 993 unsigned long size, 994 unsigned long limit_pfn) 995 { 996 unsigned int log_size = order_base_2(size); 997 998 if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) 999 return 0; 1000 1001 return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size); 1002 } 1003 1004 /* 1005 * free rcache data structures. 1006 */ 1007 static void free_iova_rcaches(struct iova_domain *iovad) 1008 { 1009 struct iova_rcache *rcache; 1010 struct iova_cpu_rcache *cpu_rcache; 1011 unsigned int cpu; 1012 int i, j; 1013 1014 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { 1015 rcache = &iovad->rcaches[i]; 1016 for_each_possible_cpu(cpu) { 1017 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); 1018 iova_magazine_free(cpu_rcache->loaded); 1019 iova_magazine_free(cpu_rcache->prev); 1020 } 1021 free_percpu(rcache->cpu_rcaches); 1022 for (j = 0; j < rcache->depot_size; ++j) 1023 iova_magazine_free(rcache->depot[j]); 1024 } 1025 } 1026 1027 /* 1028 * free all the IOVA ranges cached by a cpu (used when cpu is unplugged) 1029 */ 1030 void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad) 1031 { 1032 struct iova_cpu_rcache *cpu_rcache; 1033 struct iova_rcache *rcache; 1034 unsigned long flags; 1035 int i; 1036 1037 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { 1038 rcache = &iovad->rcaches[i]; 1039 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); 1040 spin_lock_irqsave(&cpu_rcache->lock, flags); 1041 iova_magazine_free_pfns(cpu_rcache->loaded, iovad); 1042 iova_magazine_free_pfns(cpu_rcache->prev, iovad); 1043 spin_unlock_irqrestore(&cpu_rcache->lock, flags); 1044 } 1045 } 1046 1047 MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>"); 1048 MODULE_LICENSE("GPL"); 1049