1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2020 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: Christian König 24 */ 25 26 /* Pooling of allocated pages is necessary because changing the caching 27 * attributes on x86 of the linear mapping requires a costly cross CPU TLB 28 * invalidate for those addresses. 29 * 30 * Additional to that allocations from the DMA coherent API are pooled as well 31 * cause they are rather slow compared to alloc_pages+map. 32 */ 33 34 #include <linux/module.h> 35 #include <linux/dma-mapping.h> 36 37 #ifdef CONFIG_X86 38 #include <asm/set_memory.h> 39 #endif 40 41 #include <drm/ttm/ttm_pool.h> 42 #include <drm/ttm/ttm_bo_driver.h> 43 #include <drm/ttm/ttm_tt.h> 44 45 /** 46 * struct ttm_pool_dma - Helper object for coherent DMA mappings 47 * 48 * @addr: original DMA address returned for the mapping 49 * @vaddr: original vaddr return for the mapping and order in the lower bits 50 */ 51 struct ttm_pool_dma { 52 dma_addr_t addr; 53 unsigned long vaddr; 54 }; 55 56 static unsigned long page_pool_size; 57 58 MODULE_PARM_DESC(page_pool_size, "Number of pages in the WC/UC/DMA pool"); 59 module_param(page_pool_size, ulong, 0644); 60 61 static atomic_long_t allocated_pages; 62 63 static struct ttm_pool_type global_write_combined[MAX_ORDER]; 64 static struct ttm_pool_type global_uncached[MAX_ORDER]; 65 66 static spinlock_t shrinker_lock; 67 static struct list_head shrinker_list; 68 static struct shrinker mm_shrinker; 69 70 /* Allocate pages of size 1 << order with the given gfp_flags */ 71 static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags, 72 unsigned int order) 73 { 74 unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS; 75 struct ttm_pool_dma *dma; 76 struct page *p; 77 void *vaddr; 78 79 if (order) { 80 gfp_flags |= GFP_TRANSHUGE_LIGHT | __GFP_NORETRY | 81 __GFP_KSWAPD_RECLAIM; 82 gfp_flags &= ~__GFP_MOVABLE; 83 gfp_flags &= ~__GFP_COMP; 84 } 85 86 if (!pool->use_dma_alloc) { 87 p = alloc_pages(gfp_flags, order); 88 if (p) 89 p->private = order; 90 return p; 91 } 92 93 dma = kmalloc(sizeof(*dma), GFP_KERNEL); 94 if (!dma) 95 return NULL; 96 97 if (order) 98 attr |= DMA_ATTR_NO_WARN; 99 100 vaddr = dma_alloc_attrs(pool->dev, (1ULL << order) * PAGE_SIZE, 101 &dma->addr, gfp_flags, attr); 102 if (!vaddr) 103 goto error_free; 104 105 /* TODO: This is an illegal abuse of the DMA API, but we need to rework 106 * TTM page fault handling and extend the DMA API to clean this up. 107 */ 108 if (is_vmalloc_addr(vaddr)) 109 p = vmalloc_to_page(vaddr); 110 else 111 p = virt_to_page(vaddr); 112 113 dma->vaddr = (unsigned long)vaddr | order; 114 p->private = (unsigned long)dma; 115 return p; 116 117 error_free: 118 kfree(dma); 119 return NULL; 120 } 121 122 /* Reset the caching and pages of size 1 << order */ 123 static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching, 124 unsigned int order, struct page *p) 125 { 126 unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS; 127 struct ttm_pool_dma *dma; 128 void *vaddr; 129 130 #ifdef CONFIG_X86 131 /* We don't care that set_pages_wb is inefficient here. This is only 132 * used when we have to shrink and CPU overhead is irrelevant then. 133 */ 134 if (caching != ttm_cached && !PageHighMem(p)) 135 set_pages_wb(p, 1 << order); 136 #endif 137 138 if (!pool || !pool->use_dma_alloc) { 139 __free_pages(p, order); 140 return; 141 } 142 143 if (order) 144 attr |= DMA_ATTR_NO_WARN; 145 146 dma = (void *)p->private; 147 vaddr = (void *)(dma->vaddr & PAGE_MASK); 148 dma_free_attrs(pool->dev, (1UL << order) * PAGE_SIZE, vaddr, dma->addr, 149 attr); 150 kfree(dma); 151 } 152 153 /* Apply a new caching to an array of pages */ 154 static int ttm_pool_apply_caching(struct page **first, struct page **last, 155 enum ttm_caching caching) 156 { 157 #ifdef CONFIG_X86 158 unsigned int num_pages = last - first; 159 160 if (!num_pages) 161 return 0; 162 163 switch (caching) { 164 case ttm_cached: 165 break; 166 case ttm_write_combined: 167 return set_pages_array_wc(first, num_pages); 168 case ttm_uncached: 169 return set_pages_array_uc(first, num_pages); 170 } 171 #endif 172 return 0; 173 } 174 175 /* Map pages of 1 << order size and fill the DMA address array */ 176 static int ttm_pool_map(struct ttm_pool *pool, unsigned int order, 177 struct page *p, dma_addr_t **dma_addr) 178 { 179 dma_addr_t addr; 180 unsigned int i; 181 182 if (pool->use_dma_alloc) { 183 struct ttm_pool_dma *dma = (void *)p->private; 184 185 addr = dma->addr; 186 } else { 187 size_t size = (1ULL << order) * PAGE_SIZE; 188 189 addr = dma_map_page(pool->dev, p, 0, size, DMA_BIDIRECTIONAL); 190 if (dma_mapping_error(pool->dev, **dma_addr)) 191 return -EFAULT; 192 } 193 194 for (i = 1 << order; i ; --i) { 195 *(*dma_addr)++ = addr; 196 addr += PAGE_SIZE; 197 } 198 199 return 0; 200 } 201 202 /* Unmap pages of 1 << order size */ 203 static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr, 204 unsigned int num_pages) 205 { 206 /* Unmapped while freeing the page */ 207 if (pool->use_dma_alloc) 208 return; 209 210 dma_unmap_page(pool->dev, dma_addr, (long)num_pages << PAGE_SHIFT, 211 DMA_BIDIRECTIONAL); 212 } 213 214 /* Give pages into a specific pool_type */ 215 static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p) 216 { 217 spin_lock(&pt->lock); 218 list_add(&p->lru, &pt->pages); 219 spin_unlock(&pt->lock); 220 atomic_long_add(1 << pt->order, &allocated_pages); 221 } 222 223 /* Take pages from a specific pool_type, return NULL when nothing available */ 224 static struct page *ttm_pool_type_take(struct ttm_pool_type *pt) 225 { 226 struct page *p; 227 228 spin_lock(&pt->lock); 229 p = list_first_entry_or_null(&pt->pages, typeof(*p), lru); 230 if (p) { 231 atomic_long_sub(1 << pt->order, &allocated_pages); 232 list_del(&p->lru); 233 } 234 spin_unlock(&pt->lock); 235 236 return p; 237 } 238 239 /* Count the number of pages available in a pool_type */ 240 static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt) 241 { 242 unsigned int count = 0; 243 struct page *p; 244 245 spin_lock(&pt->lock); 246 /* Only used for debugfs, the overhead doesn't matter */ 247 list_for_each_entry(p, &pt->pages, lru) 248 ++count; 249 spin_unlock(&pt->lock); 250 251 return count; 252 } 253 254 /* Initialize and add a pool type to the global shrinker list */ 255 static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool, 256 enum ttm_caching caching, unsigned int order) 257 { 258 pt->pool = pool; 259 pt->caching = caching; 260 pt->order = order; 261 spin_lock_init(&pt->lock); 262 INIT_LIST_HEAD(&pt->pages); 263 264 spin_lock(&shrinker_lock); 265 list_add_tail(&pt->shrinker_list, &shrinker_list); 266 spin_unlock(&shrinker_lock); 267 } 268 269 /* Remove a pool_type from the global shrinker list and free all pages */ 270 static void ttm_pool_type_fini(struct ttm_pool_type *pt) 271 { 272 struct page *p, *tmp; 273 274 spin_lock(&shrinker_lock); 275 list_del(&pt->shrinker_list); 276 spin_unlock(&shrinker_lock); 277 278 list_for_each_entry_safe(p, tmp, &pt->pages, lru) 279 ttm_pool_free_page(pt->pool, pt->caching, pt->order, p); 280 } 281 282 /* Return the pool_type to use for the given caching and order */ 283 static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool, 284 enum ttm_caching caching, 285 unsigned int order) 286 { 287 if (pool->use_dma_alloc) 288 return &pool->caching[caching].orders[order]; 289 290 #ifdef CONFIG_X86 291 switch (caching) { 292 case ttm_write_combined: 293 return &global_write_combined[order]; 294 case ttm_uncached: 295 return &global_uncached[order]; 296 default: 297 break; 298 } 299 #endif 300 301 return NULL; 302 } 303 304 /* Free pages using the global shrinker list */ 305 static unsigned int ttm_pool_shrink(void) 306 { 307 struct ttm_pool_type *pt; 308 unsigned int num_freed; 309 struct page *p; 310 311 spin_lock(&shrinker_lock); 312 pt = list_first_entry(&shrinker_list, typeof(*pt), shrinker_list); 313 314 p = ttm_pool_type_take(pt); 315 if (p) { 316 ttm_pool_free_page(pt->pool, pt->caching, pt->order, p); 317 num_freed = 1 << pt->order; 318 } else { 319 num_freed = 0; 320 } 321 322 list_move_tail(&pt->shrinker_list, &shrinker_list); 323 spin_unlock(&shrinker_lock); 324 325 return num_freed; 326 } 327 328 /* Return the allocation order based for a page */ 329 static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p) 330 { 331 if (pool->use_dma_alloc) { 332 struct ttm_pool_dma *dma = (void *)p->private; 333 334 return dma->vaddr & ~PAGE_MASK; 335 } 336 337 return p->private; 338 } 339 340 /** 341 * ttm_pool_alloc - Fill a ttm_tt object 342 * 343 * @pool: ttm_pool to use 344 * @tt: ttm_tt object to fill 345 * @ctx: operation context 346 * 347 * Fill the ttm_tt object with pages and also make sure to DMA map them when 348 * necessary. 349 * 350 * Returns: 0 on successe, negative error code otherwise. 351 */ 352 int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, 353 struct ttm_operation_ctx *ctx) 354 { 355 unsigned long num_pages = tt->num_pages; 356 dma_addr_t *dma_addr = tt->dma_address; 357 struct page **caching = tt->pages; 358 struct page **pages = tt->pages; 359 gfp_t gfp_flags = GFP_USER; 360 unsigned int i, order; 361 struct page *p; 362 int r; 363 364 WARN_ON(!num_pages || ttm_tt_is_populated(tt)); 365 WARN_ON(dma_addr && !pool->dev); 366 367 if (tt->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC) 368 gfp_flags |= __GFP_ZERO; 369 370 if (ctx->gfp_retry_mayfail) 371 gfp_flags |= __GFP_RETRY_MAYFAIL; 372 373 if (pool->use_dma32) 374 gfp_flags |= GFP_DMA32; 375 else 376 gfp_flags |= GFP_HIGHUSER; 377 378 for (order = min(MAX_ORDER - 1UL, __fls(num_pages)); num_pages; 379 order = min_t(unsigned int, order, __fls(num_pages))) { 380 bool apply_caching = false; 381 struct ttm_pool_type *pt; 382 383 pt = ttm_pool_select_type(pool, tt->caching, order); 384 p = pt ? ttm_pool_type_take(pt) : NULL; 385 if (p) { 386 apply_caching = true; 387 } else { 388 p = ttm_pool_alloc_page(pool, gfp_flags, order); 389 if (p && PageHighMem(p)) 390 apply_caching = true; 391 } 392 393 if (!p) { 394 if (order) { 395 --order; 396 continue; 397 } 398 r = -ENOMEM; 399 goto error_free_all; 400 } 401 402 if (apply_caching) { 403 r = ttm_pool_apply_caching(caching, pages, 404 tt->caching); 405 if (r) 406 goto error_free_page; 407 caching = pages + (1 << order); 408 } 409 410 r = ttm_mem_global_alloc_page(&ttm_mem_glob, p, 411 (1 << order) * PAGE_SIZE, 412 ctx); 413 if (r) 414 goto error_free_page; 415 416 if (dma_addr) { 417 r = ttm_pool_map(pool, order, p, &dma_addr); 418 if (r) 419 goto error_global_free; 420 } 421 422 num_pages -= 1 << order; 423 for (i = 1 << order; i; --i) 424 *(pages++) = p++; 425 } 426 427 r = ttm_pool_apply_caching(caching, pages, tt->caching); 428 if (r) 429 goto error_free_all; 430 431 return 0; 432 433 error_global_free: 434 ttm_mem_global_free_page(&ttm_mem_glob, p, (1 << order) * PAGE_SIZE); 435 436 error_free_page: 437 ttm_pool_free_page(pool, tt->caching, order, p); 438 439 error_free_all: 440 num_pages = tt->num_pages - num_pages; 441 for (i = 0; i < num_pages; ) { 442 order = ttm_pool_page_order(pool, tt->pages[i]); 443 ttm_pool_free_page(pool, tt->caching, order, tt->pages[i]); 444 i += 1 << order; 445 } 446 447 return r; 448 } 449 EXPORT_SYMBOL(ttm_pool_alloc); 450 451 /** 452 * ttm_pool_free - Free the backing pages from a ttm_tt object 453 * 454 * @pool: Pool to give pages back to. 455 * @tt: ttm_tt object to unpopulate 456 * 457 * Give the packing pages back to a pool or free them 458 */ 459 void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt) 460 { 461 unsigned int i; 462 463 for (i = 0; i < tt->num_pages; ) { 464 struct page *p = tt->pages[i]; 465 unsigned int order, num_pages; 466 struct ttm_pool_type *pt; 467 468 order = ttm_pool_page_order(pool, p); 469 num_pages = 1ULL << order; 470 ttm_mem_global_free_page(&ttm_mem_glob, p, 471 num_pages * PAGE_SIZE); 472 if (tt->dma_address) 473 ttm_pool_unmap(pool, tt->dma_address[i], num_pages); 474 475 pt = ttm_pool_select_type(pool, tt->caching, order); 476 if (pt) 477 ttm_pool_type_give(pt, tt->pages[i]); 478 else 479 ttm_pool_free_page(pool, tt->caching, order, 480 tt->pages[i]); 481 482 i += num_pages; 483 } 484 485 while (atomic_long_read(&allocated_pages) > page_pool_size) 486 ttm_pool_shrink(); 487 } 488 EXPORT_SYMBOL(ttm_pool_free); 489 490 /** 491 * ttm_pool_init - Initialize a pool 492 * 493 * @pool: the pool to initialize 494 * @dev: device for DMA allocations and mappings 495 * @use_dma_alloc: true if coherent DMA alloc should be used 496 * @use_dma32: true if GFP_DMA32 should be used 497 * 498 * Initialize the pool and its pool types. 499 */ 500 void ttm_pool_init(struct ttm_pool *pool, struct device *dev, 501 bool use_dma_alloc, bool use_dma32) 502 { 503 unsigned int i, j; 504 505 WARN_ON(!dev && use_dma_alloc); 506 507 pool->dev = dev; 508 pool->use_dma_alloc = use_dma_alloc; 509 pool->use_dma32 = use_dma32; 510 511 for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) 512 for (j = 0; j < MAX_ORDER; ++j) 513 ttm_pool_type_init(&pool->caching[i].orders[j], 514 pool, i, j); 515 } 516 EXPORT_SYMBOL(ttm_pool_init); 517 518 /** 519 * ttm_pool_fini - Cleanup a pool 520 * 521 * @pool: the pool to clean up 522 * 523 * Free all pages in the pool and unregister the types from the global 524 * shrinker. 525 */ 526 void ttm_pool_fini(struct ttm_pool *pool) 527 { 528 unsigned int i, j; 529 530 for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) 531 for (j = 0; j < MAX_ORDER; ++j) 532 ttm_pool_type_fini(&pool->caching[i].orders[j]); 533 } 534 EXPORT_SYMBOL(ttm_pool_fini); 535 536 #ifdef CONFIG_DEBUG_FS 537 538 /* Dump information about the different pool types */ 539 static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, 540 struct seq_file *m) 541 { 542 unsigned int i; 543 544 for (i = 0; i < MAX_ORDER; ++i) 545 seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); 546 seq_puts(m, "\n"); 547 } 548 549 /** 550 * ttm_pool_debugfs - Debugfs dump function for a pool 551 * 552 * @pool: the pool to dump the information for 553 * @m: seq_file to dump to 554 * 555 * Make a debugfs dump with the per pool and global information. 556 */ 557 int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) 558 { 559 unsigned int i; 560 561 spin_lock(&shrinker_lock); 562 563 seq_puts(m, "\t "); 564 for (i = 0; i < MAX_ORDER; ++i) 565 seq_printf(m, " ---%2u---", i); 566 seq_puts(m, "\n"); 567 568 seq_puts(m, "wc\t:"); 569 ttm_pool_debugfs_orders(global_write_combined, m); 570 seq_puts(m, "uc\t:"); 571 ttm_pool_debugfs_orders(global_uncached, m); 572 573 for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) { 574 seq_puts(m, "DMA "); 575 switch (i) { 576 case ttm_cached: 577 seq_puts(m, "\t:"); 578 break; 579 case ttm_write_combined: 580 seq_puts(m, "wc\t:"); 581 break; 582 case ttm_uncached: 583 seq_puts(m, "uc\t:"); 584 break; 585 } 586 ttm_pool_debugfs_orders(pool->caching[i].orders, m); 587 } 588 589 seq_printf(m, "\ntotal\t: %8lu of %8lu\n", 590 atomic_long_read(&allocated_pages), page_pool_size); 591 592 spin_unlock(&shrinker_lock); 593 594 return 0; 595 } 596 EXPORT_SYMBOL(ttm_pool_debugfs); 597 598 #endif 599 600 /* As long as pages are available make sure to release at least one */ 601 static unsigned long ttm_pool_shrinker_scan(struct shrinker *shrink, 602 struct shrink_control *sc) 603 { 604 unsigned long num_freed = 0; 605 606 do 607 num_freed += ttm_pool_shrink(); 608 while (!num_freed && atomic_long_read(&allocated_pages)); 609 610 return num_freed; 611 } 612 613 /* Return the number of pages available or SHRINK_EMPTY if we have none */ 614 static unsigned long ttm_pool_shrinker_count(struct shrinker *shrink, 615 struct shrink_control *sc) 616 { 617 unsigned long num_pages = atomic_long_read(&allocated_pages); 618 619 return num_pages ? num_pages : SHRINK_EMPTY; 620 } 621 622 /** 623 * ttm_pool_mgr_init - Initialize globals 624 * 625 * @num_pages: default number of pages 626 * 627 * Initialize the global locks and lists for the MM shrinker. 628 */ 629 int ttm_pool_mgr_init(unsigned long num_pages) 630 { 631 unsigned int i; 632 633 if (!page_pool_size) 634 page_pool_size = num_pages; 635 636 spin_lock_init(&shrinker_lock); 637 INIT_LIST_HEAD(&shrinker_list); 638 639 for (i = 0; i < MAX_ORDER; ++i) { 640 ttm_pool_type_init(&global_write_combined[i], NULL, 641 ttm_write_combined, i); 642 ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); 643 } 644 645 mm_shrinker.count_objects = ttm_pool_shrinker_count; 646 mm_shrinker.scan_objects = ttm_pool_shrinker_scan; 647 mm_shrinker.seeks = 1; 648 return register_shrinker(&mm_shrinker); 649 } 650 651 /** 652 * ttm_pool_mgr_fini - Finalize globals 653 * 654 * Cleanup the global pools and unregister the MM shrinker. 655 */ 656 void ttm_pool_mgr_fini(void) 657 { 658 unsigned int i; 659 660 for (i = 0; i < MAX_ORDER; ++i) { 661 ttm_pool_type_fini(&global_write_combined[i]); 662 ttm_pool_type_fini(&global_uncached[i]); 663 } 664 665 unregister_shrinker(&mm_shrinker); 666 WARN_ON(!list_empty(&shrinker_list)); 667 } 668