1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2020 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: Christian König 24 */ 25 26 /* Pooling of allocated pages is necessary because changing the caching 27 * attributes on x86 of the linear mapping requires a costly cross CPU TLB 28 * invalidate for those addresses. 29 * 30 * Additional to that allocations from the DMA coherent API are pooled as well 31 * cause they are rather slow compared to alloc_pages+map. 32 */ 33 34 #include <linux/module.h> 35 #include <linux/dma-mapping.h> 36 37 #ifdef CONFIG_X86 38 #include <asm/set_memory.h> 39 #endif 40 41 #include <drm/ttm/ttm_pool.h> 42 #include <drm/ttm/ttm_bo_driver.h> 43 #include <drm/ttm/ttm_tt.h> 44 45 /** 46 * struct ttm_pool_dma - Helper object for coherent DMA mappings 47 * 48 * @addr: original DMA address returned for the mapping 49 * @vaddr: original vaddr return for the mapping and order in the lower bits 50 */ 51 struct ttm_pool_dma { 52 dma_addr_t addr; 53 unsigned long vaddr; 54 }; 55 56 static unsigned long page_pool_size; 57 58 MODULE_PARM_DESC(page_pool_size, "Number of pages in the WC/UC/DMA pool"); 59 module_param(page_pool_size, ulong, 0644); 60 61 static atomic_long_t allocated_pages; 62 63 static struct ttm_pool_type global_write_combined[MAX_ORDER]; 64 static struct ttm_pool_type global_uncached[MAX_ORDER]; 65 66 static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; 67 static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; 68 69 static spinlock_t shrinker_lock; 70 static struct list_head shrinker_list; 71 static struct shrinker mm_shrinker; 72 73 /* Allocate pages of size 1 << order with the given gfp_flags */ 74 static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags, 75 unsigned int order) 76 { 77 unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS; 78 struct ttm_pool_dma *dma; 79 struct page *p; 80 void *vaddr; 81 82 if (order) { 83 gfp_flags |= GFP_TRANSHUGE_LIGHT | __GFP_NORETRY | 84 __GFP_KSWAPD_RECLAIM; 85 gfp_flags &= ~__GFP_MOVABLE; 86 gfp_flags &= ~__GFP_COMP; 87 } 88 89 if (!pool->use_dma_alloc) { 90 p = alloc_pages(gfp_flags, order); 91 if (p) 92 p->private = order; 93 return p; 94 } 95 96 dma = kmalloc(sizeof(*dma), GFP_KERNEL); 97 if (!dma) 98 return NULL; 99 100 if (order) 101 attr |= DMA_ATTR_NO_WARN; 102 103 vaddr = dma_alloc_attrs(pool->dev, (1ULL << order) * PAGE_SIZE, 104 &dma->addr, gfp_flags, attr); 105 if (!vaddr) 106 goto error_free; 107 108 /* TODO: This is an illegal abuse of the DMA API, but we need to rework 109 * TTM page fault handling and extend the DMA API to clean this up. 110 */ 111 if (is_vmalloc_addr(vaddr)) 112 p = vmalloc_to_page(vaddr); 113 else 114 p = virt_to_page(vaddr); 115 116 dma->vaddr = (unsigned long)vaddr | order; 117 p->private = (unsigned long)dma; 118 return p; 119 120 error_free: 121 kfree(dma); 122 return NULL; 123 } 124 125 /* Reset the caching and pages of size 1 << order */ 126 static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching, 127 unsigned int order, struct page *p) 128 { 129 unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS; 130 struct ttm_pool_dma *dma; 131 void *vaddr; 132 133 #ifdef CONFIG_X86 134 /* We don't care that set_pages_wb is inefficient here. This is only 135 * used when we have to shrink and CPU overhead is irrelevant then. 136 */ 137 if (caching != ttm_cached && !PageHighMem(p)) 138 set_pages_wb(p, 1 << order); 139 #endif 140 141 if (!pool || !pool->use_dma_alloc) { 142 __free_pages(p, order); 143 return; 144 } 145 146 if (order) 147 attr |= DMA_ATTR_NO_WARN; 148 149 dma = (void *)p->private; 150 vaddr = (void *)(dma->vaddr & PAGE_MASK); 151 dma_free_attrs(pool->dev, (1UL << order) * PAGE_SIZE, vaddr, dma->addr, 152 attr); 153 kfree(dma); 154 } 155 156 /* Apply a new caching to an array of pages */ 157 static int ttm_pool_apply_caching(struct page **first, struct page **last, 158 enum ttm_caching caching) 159 { 160 #ifdef CONFIG_X86 161 unsigned int num_pages = last - first; 162 163 if (!num_pages) 164 return 0; 165 166 switch (caching) { 167 case ttm_cached: 168 break; 169 case ttm_write_combined: 170 return set_pages_array_wc(first, num_pages); 171 case ttm_uncached: 172 return set_pages_array_uc(first, num_pages); 173 } 174 #endif 175 return 0; 176 } 177 178 /* Map pages of 1 << order size and fill the DMA address array */ 179 static int ttm_pool_map(struct ttm_pool *pool, unsigned int order, 180 struct page *p, dma_addr_t **dma_addr) 181 { 182 dma_addr_t addr; 183 unsigned int i; 184 185 if (pool->use_dma_alloc) { 186 struct ttm_pool_dma *dma = (void *)p->private; 187 188 addr = dma->addr; 189 } else { 190 size_t size = (1ULL << order) * PAGE_SIZE; 191 192 addr = dma_map_page(pool->dev, p, 0, size, DMA_BIDIRECTIONAL); 193 if (dma_mapping_error(pool->dev, **dma_addr)) 194 return -EFAULT; 195 } 196 197 for (i = 1 << order; i ; --i) { 198 *(*dma_addr)++ = addr; 199 addr += PAGE_SIZE; 200 } 201 202 return 0; 203 } 204 205 /* Unmap pages of 1 << order size */ 206 static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr, 207 unsigned int num_pages) 208 { 209 /* Unmapped while freeing the page */ 210 if (pool->use_dma_alloc) 211 return; 212 213 dma_unmap_page(pool->dev, dma_addr, (long)num_pages << PAGE_SHIFT, 214 DMA_BIDIRECTIONAL); 215 } 216 217 /* Give pages into a specific pool_type */ 218 static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p) 219 { 220 spin_lock(&pt->lock); 221 list_add(&p->lru, &pt->pages); 222 spin_unlock(&pt->lock); 223 atomic_long_add(1 << pt->order, &allocated_pages); 224 } 225 226 /* Take pages from a specific pool_type, return NULL when nothing available */ 227 static struct page *ttm_pool_type_take(struct ttm_pool_type *pt) 228 { 229 struct page *p; 230 231 spin_lock(&pt->lock); 232 p = list_first_entry_or_null(&pt->pages, typeof(*p), lru); 233 if (p) { 234 atomic_long_sub(1 << pt->order, &allocated_pages); 235 list_del(&p->lru); 236 } 237 spin_unlock(&pt->lock); 238 239 return p; 240 } 241 242 /* Initialize and add a pool type to the global shrinker list */ 243 static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool, 244 enum ttm_caching caching, unsigned int order) 245 { 246 pt->pool = pool; 247 pt->caching = caching; 248 pt->order = order; 249 spin_lock_init(&pt->lock); 250 INIT_LIST_HEAD(&pt->pages); 251 252 spin_lock(&shrinker_lock); 253 list_add_tail(&pt->shrinker_list, &shrinker_list); 254 spin_unlock(&shrinker_lock); 255 } 256 257 /* Remove a pool_type from the global shrinker list and free all pages */ 258 static void ttm_pool_type_fini(struct ttm_pool_type *pt) 259 { 260 struct page *p, *tmp; 261 262 spin_lock(&shrinker_lock); 263 list_del(&pt->shrinker_list); 264 spin_unlock(&shrinker_lock); 265 266 list_for_each_entry_safe(p, tmp, &pt->pages, lru) 267 ttm_pool_free_page(pt->pool, pt->caching, pt->order, p); 268 } 269 270 /* Return the pool_type to use for the given caching and order */ 271 static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool, 272 enum ttm_caching caching, 273 unsigned int order) 274 { 275 if (pool->use_dma_alloc) 276 return &pool->caching[caching].orders[order]; 277 278 #ifdef CONFIG_X86 279 switch (caching) { 280 case ttm_write_combined: 281 if (pool->use_dma32) 282 return &global_dma32_write_combined[order]; 283 284 return &global_write_combined[order]; 285 case ttm_uncached: 286 if (pool->use_dma32) 287 return &global_dma32_uncached[order]; 288 289 return &global_uncached[order]; 290 default: 291 break; 292 } 293 #endif 294 295 return NULL; 296 } 297 298 /* Free pages using the global shrinker list */ 299 static unsigned int ttm_pool_shrink(void) 300 { 301 struct ttm_pool_type *pt; 302 unsigned int num_freed; 303 struct page *p; 304 305 spin_lock(&shrinker_lock); 306 pt = list_first_entry(&shrinker_list, typeof(*pt), shrinker_list); 307 308 p = ttm_pool_type_take(pt); 309 if (p) { 310 ttm_pool_free_page(pt->pool, pt->caching, pt->order, p); 311 num_freed = 1 << pt->order; 312 } else { 313 num_freed = 0; 314 } 315 316 list_move_tail(&pt->shrinker_list, &shrinker_list); 317 spin_unlock(&shrinker_lock); 318 319 return num_freed; 320 } 321 322 /* Return the allocation order based for a page */ 323 static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p) 324 { 325 if (pool->use_dma_alloc) { 326 struct ttm_pool_dma *dma = (void *)p->private; 327 328 return dma->vaddr & ~PAGE_MASK; 329 } 330 331 return p->private; 332 } 333 334 /** 335 * ttm_pool_alloc - Fill a ttm_tt object 336 * 337 * @pool: ttm_pool to use 338 * @tt: ttm_tt object to fill 339 * @ctx: operation context 340 * 341 * Fill the ttm_tt object with pages and also make sure to DMA map them when 342 * necessary. 343 * 344 * Returns: 0 on successe, negative error code otherwise. 345 */ 346 int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, 347 struct ttm_operation_ctx *ctx) 348 { 349 unsigned long num_pages = tt->num_pages; 350 dma_addr_t *dma_addr = tt->dma_address; 351 struct page **caching = tt->pages; 352 struct page **pages = tt->pages; 353 gfp_t gfp_flags = GFP_USER; 354 unsigned int i, order; 355 struct page *p; 356 int r; 357 358 WARN_ON(!num_pages || ttm_tt_is_populated(tt)); 359 WARN_ON(dma_addr && !pool->dev); 360 361 if (tt->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC) 362 gfp_flags |= __GFP_ZERO; 363 364 if (ctx->gfp_retry_mayfail) 365 gfp_flags |= __GFP_RETRY_MAYFAIL; 366 367 if (pool->use_dma32) 368 gfp_flags |= GFP_DMA32; 369 else 370 gfp_flags |= GFP_HIGHUSER; 371 372 for (order = min(MAX_ORDER - 1UL, __fls(num_pages)); num_pages; 373 order = min_t(unsigned int, order, __fls(num_pages))) { 374 bool apply_caching = false; 375 struct ttm_pool_type *pt; 376 377 pt = ttm_pool_select_type(pool, tt->caching, order); 378 p = pt ? ttm_pool_type_take(pt) : NULL; 379 if (p) { 380 apply_caching = true; 381 } else { 382 p = ttm_pool_alloc_page(pool, gfp_flags, order); 383 if (p && PageHighMem(p)) 384 apply_caching = true; 385 } 386 387 if (!p) { 388 if (order) { 389 --order; 390 continue; 391 } 392 r = -ENOMEM; 393 goto error_free_all; 394 } 395 396 if (apply_caching) { 397 r = ttm_pool_apply_caching(caching, pages, 398 tt->caching); 399 if (r) 400 goto error_free_page; 401 caching = pages + (1 << order); 402 } 403 404 r = ttm_mem_global_alloc_page(&ttm_mem_glob, p, 405 (1 << order) * PAGE_SIZE, 406 ctx); 407 if (r) 408 goto error_free_page; 409 410 if (dma_addr) { 411 r = ttm_pool_map(pool, order, p, &dma_addr); 412 if (r) 413 goto error_global_free; 414 } 415 416 num_pages -= 1 << order; 417 for (i = 1 << order; i; --i) 418 *(pages++) = p++; 419 } 420 421 r = ttm_pool_apply_caching(caching, pages, tt->caching); 422 if (r) 423 goto error_free_all; 424 425 return 0; 426 427 error_global_free: 428 ttm_mem_global_free_page(&ttm_mem_glob, p, (1 << order) * PAGE_SIZE); 429 430 error_free_page: 431 ttm_pool_free_page(pool, tt->caching, order, p); 432 433 error_free_all: 434 num_pages = tt->num_pages - num_pages; 435 for (i = 0; i < num_pages; ) { 436 order = ttm_pool_page_order(pool, tt->pages[i]); 437 ttm_pool_free_page(pool, tt->caching, order, tt->pages[i]); 438 i += 1 << order; 439 } 440 441 return r; 442 } 443 EXPORT_SYMBOL(ttm_pool_alloc); 444 445 /** 446 * ttm_pool_free - Free the backing pages from a ttm_tt object 447 * 448 * @pool: Pool to give pages back to. 449 * @tt: ttm_tt object to unpopulate 450 * 451 * Give the packing pages back to a pool or free them 452 */ 453 void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt) 454 { 455 unsigned int i; 456 457 for (i = 0; i < tt->num_pages; ) { 458 struct page *p = tt->pages[i]; 459 unsigned int order, num_pages; 460 struct ttm_pool_type *pt; 461 462 order = ttm_pool_page_order(pool, p); 463 num_pages = 1ULL << order; 464 ttm_mem_global_free_page(&ttm_mem_glob, p, 465 num_pages * PAGE_SIZE); 466 if (tt->dma_address) 467 ttm_pool_unmap(pool, tt->dma_address[i], num_pages); 468 469 pt = ttm_pool_select_type(pool, tt->caching, order); 470 if (pt) 471 ttm_pool_type_give(pt, tt->pages[i]); 472 else 473 ttm_pool_free_page(pool, tt->caching, order, 474 tt->pages[i]); 475 476 i += num_pages; 477 } 478 479 while (atomic_long_read(&allocated_pages) > page_pool_size) 480 ttm_pool_shrink(); 481 } 482 EXPORT_SYMBOL(ttm_pool_free); 483 484 /** 485 * ttm_pool_init - Initialize a pool 486 * 487 * @pool: the pool to initialize 488 * @dev: device for DMA allocations and mappings 489 * @use_dma_alloc: true if coherent DMA alloc should be used 490 * @use_dma32: true if GFP_DMA32 should be used 491 * 492 * Initialize the pool and its pool types. 493 */ 494 void ttm_pool_init(struct ttm_pool *pool, struct device *dev, 495 bool use_dma_alloc, bool use_dma32) 496 { 497 unsigned int i, j; 498 499 WARN_ON(!dev && use_dma_alloc); 500 501 pool->dev = dev; 502 pool->use_dma_alloc = use_dma_alloc; 503 pool->use_dma32 = use_dma32; 504 505 for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) 506 for (j = 0; j < MAX_ORDER; ++j) 507 ttm_pool_type_init(&pool->caching[i].orders[j], 508 pool, i, j); 509 } 510 EXPORT_SYMBOL(ttm_pool_init); 511 512 /** 513 * ttm_pool_fini - Cleanup a pool 514 * 515 * @pool: the pool to clean up 516 * 517 * Free all pages in the pool and unregister the types from the global 518 * shrinker. 519 */ 520 void ttm_pool_fini(struct ttm_pool *pool) 521 { 522 unsigned int i, j; 523 524 for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) 525 for (j = 0; j < MAX_ORDER; ++j) 526 ttm_pool_type_fini(&pool->caching[i].orders[j]); 527 } 528 EXPORT_SYMBOL(ttm_pool_fini); 529 530 #ifdef CONFIG_DEBUG_FS 531 /* Count the number of pages available in a pool_type */ 532 static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt) 533 { 534 unsigned int count = 0; 535 struct page *p; 536 537 spin_lock(&pt->lock); 538 /* Only used for debugfs, the overhead doesn't matter */ 539 list_for_each_entry(p, &pt->pages, lru) 540 ++count; 541 spin_unlock(&pt->lock); 542 543 return count; 544 } 545 546 /* Dump information about the different pool types */ 547 static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, 548 struct seq_file *m) 549 { 550 unsigned int i; 551 552 for (i = 0; i < MAX_ORDER; ++i) 553 seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); 554 seq_puts(m, "\n"); 555 } 556 557 /** 558 * ttm_pool_debugfs - Debugfs dump function for a pool 559 * 560 * @pool: the pool to dump the information for 561 * @m: seq_file to dump to 562 * 563 * Make a debugfs dump with the per pool and global information. 564 */ 565 int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) 566 { 567 unsigned int i; 568 569 spin_lock(&shrinker_lock); 570 571 seq_puts(m, "\t "); 572 for (i = 0; i < MAX_ORDER; ++i) 573 seq_printf(m, " ---%2u---", i); 574 seq_puts(m, "\n"); 575 576 seq_puts(m, "wc\t:"); 577 ttm_pool_debugfs_orders(global_write_combined, m); 578 seq_puts(m, "uc\t:"); 579 ttm_pool_debugfs_orders(global_uncached, m); 580 581 seq_puts(m, "wc 32\t:"); 582 ttm_pool_debugfs_orders(global_dma32_write_combined, m); 583 seq_puts(m, "uc 32\t:"); 584 ttm_pool_debugfs_orders(global_dma32_uncached, m); 585 586 for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) { 587 seq_puts(m, "DMA "); 588 switch (i) { 589 case ttm_cached: 590 seq_puts(m, "\t:"); 591 break; 592 case ttm_write_combined: 593 seq_puts(m, "wc\t:"); 594 break; 595 case ttm_uncached: 596 seq_puts(m, "uc\t:"); 597 break; 598 } 599 ttm_pool_debugfs_orders(pool->caching[i].orders, m); 600 } 601 602 seq_printf(m, "\ntotal\t: %8lu of %8lu\n", 603 atomic_long_read(&allocated_pages), page_pool_size); 604 605 spin_unlock(&shrinker_lock); 606 607 return 0; 608 } 609 EXPORT_SYMBOL(ttm_pool_debugfs); 610 611 #endif 612 613 /* As long as pages are available make sure to release at least one */ 614 static unsigned long ttm_pool_shrinker_scan(struct shrinker *shrink, 615 struct shrink_control *sc) 616 { 617 unsigned long num_freed = 0; 618 619 do 620 num_freed += ttm_pool_shrink(); 621 while (!num_freed && atomic_long_read(&allocated_pages)); 622 623 return num_freed; 624 } 625 626 /* Return the number of pages available or SHRINK_EMPTY if we have none */ 627 static unsigned long ttm_pool_shrinker_count(struct shrinker *shrink, 628 struct shrink_control *sc) 629 { 630 unsigned long num_pages = atomic_long_read(&allocated_pages); 631 632 return num_pages ? num_pages : SHRINK_EMPTY; 633 } 634 635 /** 636 * ttm_pool_mgr_init - Initialize globals 637 * 638 * @num_pages: default number of pages 639 * 640 * Initialize the global locks and lists for the MM shrinker. 641 */ 642 int ttm_pool_mgr_init(unsigned long num_pages) 643 { 644 unsigned int i; 645 646 if (!page_pool_size) 647 page_pool_size = num_pages; 648 649 spin_lock_init(&shrinker_lock); 650 INIT_LIST_HEAD(&shrinker_list); 651 652 for (i = 0; i < MAX_ORDER; ++i) { 653 ttm_pool_type_init(&global_write_combined[i], NULL, 654 ttm_write_combined, i); 655 ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); 656 657 ttm_pool_type_init(&global_dma32_write_combined[i], NULL, 658 ttm_write_combined, i); 659 ttm_pool_type_init(&global_dma32_uncached[i], NULL, 660 ttm_uncached, i); 661 } 662 663 mm_shrinker.count_objects = ttm_pool_shrinker_count; 664 mm_shrinker.scan_objects = ttm_pool_shrinker_scan; 665 mm_shrinker.seeks = 1; 666 return register_shrinker(&mm_shrinker); 667 } 668 669 /** 670 * ttm_pool_mgr_fini - Finalize globals 671 * 672 * Cleanup the global pools and unregister the MM shrinker. 673 */ 674 void ttm_pool_mgr_fini(void) 675 { 676 unsigned int i; 677 678 for (i = 0; i < MAX_ORDER; ++i) { 679 ttm_pool_type_fini(&global_write_combined[i]); 680 ttm_pool_type_fini(&global_uncached[i]); 681 682 ttm_pool_type_fini(&global_dma32_write_combined[i]); 683 ttm_pool_type_fini(&global_dma32_uncached[i]); 684 } 685 686 unregister_shrinker(&mm_shrinker); 687 WARN_ON(!list_empty(&shrinker_list)); 688 } 689