1 /* SPDX-License-Identifier: GPL-2.0 OR MIT */ 2 /************************************************************************** 3 * 4 * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 22 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 23 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 /* 29 * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com> 30 */ 31 32 #define pr_fmt(fmt) "[TTM] " fmt 33 34 #include <drm/ttm/ttm_module.h> 35 #include <drm/ttm/ttm_bo_driver.h> 36 #include <drm/ttm/ttm_placement.h> 37 #include <drm/drm_vma_manager.h> 38 #include <linux/mm.h> 39 #include <linux/pfn_t.h> 40 #include <linux/rbtree.h> 41 #include <linux/module.h> 42 #include <linux/uaccess.h> 43 #include <linux/mem_encrypt.h> 44 45 static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, 46 struct vm_fault *vmf) 47 { 48 vm_fault_t ret = 0; 49 int err = 0; 50 51 if (likely(!bo->moving)) 52 goto out_unlock; 53 54 /* 55 * Quick non-stalling check for idle. 56 */ 57 if (dma_fence_is_signaled(bo->moving)) 58 goto out_clear; 59 60 /* 61 * If possible, avoid waiting for GPU with mmap_lock 62 * held. We only do this if the fault allows retry and this 63 * is the first attempt. 64 */ 65 if (fault_flag_allow_retry_first(vmf->flags)) { 66 ret = VM_FAULT_RETRY; 67 if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) 68 goto out_unlock; 69 70 ttm_bo_get(bo); 71 mmap_read_unlock(vmf->vma->vm_mm); 72 (void) dma_fence_wait(bo->moving, true); 73 dma_resv_unlock(bo->base.resv); 74 ttm_bo_put(bo); 75 goto out_unlock; 76 } 77 78 /* 79 * Ordinary wait. 80 */ 81 err = dma_fence_wait(bo->moving, true); 82 if (unlikely(err != 0)) { 83 ret = (err != -ERESTARTSYS) ? VM_FAULT_SIGBUS : 84 VM_FAULT_NOPAGE; 85 goto out_unlock; 86 } 87 88 out_clear: 89 dma_fence_put(bo->moving); 90 bo->moving = NULL; 91 92 out_unlock: 93 return ret; 94 } 95 96 static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo, 97 unsigned long page_offset) 98 { 99 struct ttm_bo_device *bdev = bo->bdev; 100 101 if (bdev->driver->io_mem_pfn) 102 return bdev->driver->io_mem_pfn(bo, page_offset); 103 104 return ((bo->mem.bus.base + bo->mem.bus.offset) >> PAGE_SHIFT) 105 + page_offset; 106 } 107 108 /** 109 * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback 110 * @bo: The buffer object 111 * @vmf: The fault structure handed to the callback 112 * 113 * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped 114 * during long waits, and after the wait the callback will be restarted. This 115 * is to allow other threads using the same virtual memory space concurrent 116 * access to map(), unmap() completely unrelated buffer objects. TTM buffer 117 * object reservations sometimes wait for GPU and should therefore be 118 * considered long waits. This function reserves the buffer object interruptibly 119 * taking this into account. Starvation is avoided by the vm system not 120 * allowing too many repeated restarts. 121 * This function is intended to be used in customized fault() and _mkwrite() 122 * handlers. 123 * 124 * Return: 125 * 0 on success and the bo was reserved. 126 * VM_FAULT_RETRY if blocking wait. 127 * VM_FAULT_NOPAGE if blocking wait and retrying was not allowed. 128 */ 129 vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, 130 struct vm_fault *vmf) 131 { 132 /* 133 * Work around locking order reversal in fault / nopfn 134 * between mmap_lock and bo_reserve: Perform a trylock operation 135 * for reserve, and if it fails, retry the fault after waiting 136 * for the buffer to become unreserved. 137 */ 138 if (unlikely(!dma_resv_trylock(bo->base.resv))) { 139 /* 140 * If the fault allows retry and this is the first 141 * fault attempt, we try to release the mmap_lock 142 * before waiting 143 */ 144 if (fault_flag_allow_retry_first(vmf->flags)) { 145 if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { 146 ttm_bo_get(bo); 147 mmap_read_unlock(vmf->vma->vm_mm); 148 if (!dma_resv_lock_interruptible(bo->base.resv, 149 NULL)) 150 dma_resv_unlock(bo->base.resv); 151 ttm_bo_put(bo); 152 } 153 154 return VM_FAULT_RETRY; 155 } 156 157 if (dma_resv_lock_interruptible(bo->base.resv, NULL)) 158 return VM_FAULT_NOPAGE; 159 } 160 161 return 0; 162 } 163 EXPORT_SYMBOL(ttm_bo_vm_reserve); 164 165 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 166 /** 167 * ttm_bo_vm_insert_huge - Insert a pfn for PUD or PMD faults 168 * @vmf: Fault data 169 * @bo: The buffer object 170 * @page_offset: Page offset from bo start 171 * @fault_page_size: The size of the fault in pages. 172 * @pgprot: The page protections. 173 * Does additional checking whether it's possible to insert a PUD or PMD 174 * pfn and performs the insertion. 175 * 176 * Return: VM_FAULT_NOPAGE on successful insertion, VM_FAULT_FALLBACK if 177 * a huge fault was not possible, or on insertion error. 178 */ 179 static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf, 180 struct ttm_buffer_object *bo, 181 pgoff_t page_offset, 182 pgoff_t fault_page_size, 183 pgprot_t pgprot) 184 { 185 pgoff_t i; 186 vm_fault_t ret; 187 unsigned long pfn; 188 pfn_t pfnt; 189 struct ttm_tt *ttm = bo->ttm; 190 bool write = vmf->flags & FAULT_FLAG_WRITE; 191 192 /* Fault should not cross bo boundary. */ 193 page_offset &= ~(fault_page_size - 1); 194 if (page_offset + fault_page_size > bo->num_pages) 195 goto out_fallback; 196 197 if (bo->mem.bus.is_iomem) 198 pfn = ttm_bo_io_mem_pfn(bo, page_offset); 199 else 200 pfn = page_to_pfn(ttm->pages[page_offset]); 201 202 /* pfn must be fault_page_size aligned. */ 203 if ((pfn & (fault_page_size - 1)) != 0) 204 goto out_fallback; 205 206 /* Check that memory is contiguous. */ 207 if (!bo->mem.bus.is_iomem) { 208 for (i = 1; i < fault_page_size; ++i) { 209 if (page_to_pfn(ttm->pages[page_offset + i]) != pfn + i) 210 goto out_fallback; 211 } 212 } else if (bo->bdev->driver->io_mem_pfn) { 213 for (i = 1; i < fault_page_size; ++i) { 214 if (ttm_bo_io_mem_pfn(bo, page_offset + i) != pfn + i) 215 goto out_fallback; 216 } 217 } 218 219 pfnt = __pfn_to_pfn_t(pfn, PFN_DEV); 220 if (fault_page_size == (HPAGE_PMD_SIZE >> PAGE_SHIFT)) 221 ret = vmf_insert_pfn_pmd_prot(vmf, pfnt, pgprot, write); 222 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 223 else if (fault_page_size == (HPAGE_PUD_SIZE >> PAGE_SHIFT)) 224 ret = vmf_insert_pfn_pud_prot(vmf, pfnt, pgprot, write); 225 #endif 226 else 227 WARN_ON_ONCE(ret = VM_FAULT_FALLBACK); 228 229 if (ret != VM_FAULT_NOPAGE) 230 goto out_fallback; 231 232 return VM_FAULT_NOPAGE; 233 out_fallback: 234 count_vm_event(THP_FAULT_FALLBACK); 235 return VM_FAULT_FALLBACK; 236 } 237 #else 238 static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf, 239 struct ttm_buffer_object *bo, 240 pgoff_t page_offset, 241 pgoff_t fault_page_size, 242 pgprot_t pgprot) 243 { 244 return VM_FAULT_FALLBACK; 245 } 246 #endif 247 248 /** 249 * ttm_bo_vm_fault_reserved - TTM fault helper 250 * @vmf: The struct vm_fault given as argument to the fault callback 251 * @prot: The page protection to be used for this memory area. 252 * @num_prefault: Maximum number of prefault pages. The caller may want to 253 * specify this based on madvice settings and the size of the GPU object 254 * backed by the memory. 255 * @fault_page_size: The size of the fault in pages. 256 * 257 * This function inserts one or more page table entries pointing to the 258 * memory backing the buffer object, and then returns a return code 259 * instructing the caller to retry the page access. 260 * 261 * Return: 262 * VM_FAULT_NOPAGE on success or pending signal 263 * VM_FAULT_SIGBUS on unspecified error 264 * VM_FAULT_OOM on out-of-memory 265 * VM_FAULT_RETRY if retryable wait 266 */ 267 vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, 268 pgprot_t prot, 269 pgoff_t num_prefault, 270 pgoff_t fault_page_size) 271 { 272 struct vm_area_struct *vma = vmf->vma; 273 struct ttm_buffer_object *bo = vma->vm_private_data; 274 struct ttm_bo_device *bdev = bo->bdev; 275 unsigned long page_offset; 276 unsigned long page_last; 277 unsigned long pfn; 278 struct ttm_tt *ttm = NULL; 279 struct page *page; 280 int err; 281 pgoff_t i; 282 vm_fault_t ret = VM_FAULT_NOPAGE; 283 unsigned long address = vmf->address; 284 struct ttm_resource_manager *man = 285 ttm_manager_type(bdev, bo->mem.mem_type); 286 287 /* 288 * Refuse to fault imported pages. This should be handled 289 * (if at all) by redirecting mmap to the exporter. 290 */ 291 if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) 292 return VM_FAULT_SIGBUS; 293 294 if (bdev->driver->fault_reserve_notify) { 295 struct dma_fence *moving = dma_fence_get(bo->moving); 296 297 err = bdev->driver->fault_reserve_notify(bo); 298 switch (err) { 299 case 0: 300 break; 301 case -EBUSY: 302 case -ERESTARTSYS: 303 dma_fence_put(moving); 304 return VM_FAULT_NOPAGE; 305 default: 306 dma_fence_put(moving); 307 return VM_FAULT_SIGBUS; 308 } 309 310 if (bo->moving != moving) { 311 ttm_bo_move_to_lru_tail_unlocked(bo); 312 } 313 dma_fence_put(moving); 314 } 315 316 /* 317 * Wait for buffer data in transit, due to a pipelined 318 * move. 319 */ 320 ret = ttm_bo_vm_fault_idle(bo, vmf); 321 if (unlikely(ret != 0)) 322 return ret; 323 324 err = ttm_mem_io_lock(man, true); 325 if (unlikely(err != 0)) 326 return VM_FAULT_NOPAGE; 327 err = ttm_mem_io_reserve_vm(bo); 328 if (unlikely(err != 0)) { 329 ret = VM_FAULT_SIGBUS; 330 goto out_io_unlock; 331 } 332 333 page_offset = ((address - vma->vm_start) >> PAGE_SHIFT) + 334 vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node); 335 page_last = vma_pages(vma) + vma->vm_pgoff - 336 drm_vma_node_start(&bo->base.vma_node); 337 338 if (unlikely(page_offset >= bo->num_pages)) { 339 ret = VM_FAULT_SIGBUS; 340 goto out_io_unlock; 341 } 342 343 prot = ttm_io_prot(bo->mem.placement, prot); 344 if (!bo->mem.bus.is_iomem) { 345 struct ttm_operation_ctx ctx = { 346 .interruptible = false, 347 .no_wait_gpu = false, 348 .flags = TTM_OPT_FLAG_FORCE_ALLOC 349 350 }; 351 352 ttm = bo->ttm; 353 if (ttm_tt_populate(bo->ttm, &ctx)) { 354 ret = VM_FAULT_OOM; 355 goto out_io_unlock; 356 } 357 } else { 358 /* Iomem should not be marked encrypted */ 359 prot = pgprot_decrypted(prot); 360 } 361 362 /* We don't prefault on huge faults. Yet. */ 363 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && fault_page_size != 1) { 364 ret = ttm_bo_vm_insert_huge(vmf, bo, page_offset, 365 fault_page_size, prot); 366 goto out_io_unlock; 367 } 368 369 /* 370 * Speculatively prefault a number of pages. Only error on 371 * first page. 372 */ 373 for (i = 0; i < num_prefault; ++i) { 374 if (bo->mem.bus.is_iomem) { 375 pfn = ttm_bo_io_mem_pfn(bo, page_offset); 376 } else { 377 page = ttm->pages[page_offset]; 378 if (unlikely(!page && i == 0)) { 379 ret = VM_FAULT_OOM; 380 goto out_io_unlock; 381 } else if (unlikely(!page)) { 382 break; 383 } 384 page->index = drm_vma_node_start(&bo->base.vma_node) + 385 page_offset; 386 pfn = page_to_pfn(page); 387 } 388 389 /* 390 * Note that the value of @prot at this point may differ from 391 * the value of @vma->vm_page_prot in the caching- and 392 * encryption bits. This is because the exact location of the 393 * data may not be known at mmap() time and may also change 394 * at arbitrary times while the data is mmap'ed. 395 * See vmf_insert_mixed_prot() for a discussion. 396 */ 397 if (vma->vm_flags & VM_MIXEDMAP) 398 ret = vmf_insert_mixed_prot(vma, address, 399 __pfn_to_pfn_t(pfn, PFN_DEV), 400 prot); 401 else 402 ret = vmf_insert_pfn_prot(vma, address, pfn, prot); 403 404 /* Never error on prefaulted PTEs */ 405 if (unlikely((ret & VM_FAULT_ERROR))) { 406 if (i == 0) 407 goto out_io_unlock; 408 else 409 break; 410 } 411 412 address += PAGE_SIZE; 413 if (unlikely(++page_offset >= page_last)) 414 break; 415 } 416 ret = VM_FAULT_NOPAGE; 417 out_io_unlock: 418 ttm_mem_io_unlock(man); 419 return ret; 420 } 421 EXPORT_SYMBOL(ttm_bo_vm_fault_reserved); 422 423 vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) 424 { 425 struct vm_area_struct *vma = vmf->vma; 426 pgprot_t prot; 427 struct ttm_buffer_object *bo = vma->vm_private_data; 428 vm_fault_t ret; 429 430 ret = ttm_bo_vm_reserve(bo, vmf); 431 if (ret) 432 return ret; 433 434 prot = vma->vm_page_prot; 435 ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); 436 if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) 437 return ret; 438 439 dma_resv_unlock(bo->base.resv); 440 441 return ret; 442 } 443 EXPORT_SYMBOL(ttm_bo_vm_fault); 444 445 void ttm_bo_vm_open(struct vm_area_struct *vma) 446 { 447 struct ttm_buffer_object *bo = vma->vm_private_data; 448 449 WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping); 450 451 ttm_bo_get(bo); 452 } 453 EXPORT_SYMBOL(ttm_bo_vm_open); 454 455 void ttm_bo_vm_close(struct vm_area_struct *vma) 456 { 457 struct ttm_buffer_object *bo = vma->vm_private_data; 458 459 ttm_bo_put(bo); 460 vma->vm_private_data = NULL; 461 } 462 EXPORT_SYMBOL(ttm_bo_vm_close); 463 464 static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo, 465 unsigned long offset, 466 uint8_t *buf, int len, int write) 467 { 468 unsigned long page = offset >> PAGE_SHIFT; 469 unsigned long bytes_left = len; 470 int ret; 471 472 /* Copy a page at a time, that way no extra virtual address 473 * mapping is needed 474 */ 475 offset -= page << PAGE_SHIFT; 476 do { 477 unsigned long bytes = min(bytes_left, PAGE_SIZE - offset); 478 struct ttm_bo_kmap_obj map; 479 void *ptr; 480 bool is_iomem; 481 482 ret = ttm_bo_kmap(bo, page, 1, &map); 483 if (ret) 484 return ret; 485 486 ptr = (uint8_t *)ttm_kmap_obj_virtual(&map, &is_iomem) + offset; 487 WARN_ON_ONCE(is_iomem); 488 if (write) 489 memcpy(ptr, buf, bytes); 490 else 491 memcpy(buf, ptr, bytes); 492 ttm_bo_kunmap(&map); 493 494 page++; 495 buf += bytes; 496 bytes_left -= bytes; 497 offset = 0; 498 } while (bytes_left); 499 500 return len; 501 } 502 503 int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, 504 void *buf, int len, int write) 505 { 506 struct ttm_buffer_object *bo = vma->vm_private_data; 507 unsigned long offset = (addr) - vma->vm_start + 508 ((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node)) 509 << PAGE_SHIFT); 510 int ret; 511 512 if (len < 1 || (offset + len) >> PAGE_SHIFT > bo->num_pages) 513 return -EIO; 514 515 ret = ttm_bo_reserve(bo, true, false, NULL); 516 if (ret) 517 return ret; 518 519 switch (bo->mem.mem_type) { 520 case TTM_PL_SYSTEM: 521 if (unlikely(bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) { 522 ret = ttm_tt_swapin(bo->ttm); 523 if (unlikely(ret != 0)) 524 return ret; 525 } 526 fallthrough; 527 case TTM_PL_TT: 528 ret = ttm_bo_vm_access_kmap(bo, offset, buf, len, write); 529 break; 530 default: 531 if (bo->bdev->driver->access_memory) 532 ret = bo->bdev->driver->access_memory( 533 bo, offset, buf, len, write); 534 else 535 ret = -EIO; 536 } 537 538 ttm_bo_unreserve(bo); 539 540 return ret; 541 } 542 EXPORT_SYMBOL(ttm_bo_vm_access); 543 544 static const struct vm_operations_struct ttm_bo_vm_ops = { 545 .fault = ttm_bo_vm_fault, 546 .open = ttm_bo_vm_open, 547 .close = ttm_bo_vm_close, 548 .access = ttm_bo_vm_access, 549 }; 550 551 static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev, 552 unsigned long offset, 553 unsigned long pages) 554 { 555 struct drm_vma_offset_node *node; 556 struct ttm_buffer_object *bo = NULL; 557 558 drm_vma_offset_lock_lookup(bdev->vma_manager); 559 560 node = drm_vma_offset_lookup_locked(bdev->vma_manager, offset, pages); 561 if (likely(node)) { 562 bo = container_of(node, struct ttm_buffer_object, 563 base.vma_node); 564 bo = ttm_bo_get_unless_zero(bo); 565 } 566 567 drm_vma_offset_unlock_lookup(bdev->vma_manager); 568 569 if (!bo) 570 pr_err("Could not find buffer object to map\n"); 571 572 return bo; 573 } 574 575 static void ttm_bo_mmap_vma_setup(struct ttm_buffer_object *bo, struct vm_area_struct *vma) 576 { 577 vma->vm_ops = &ttm_bo_vm_ops; 578 579 /* 580 * Note: We're transferring the bo reference to 581 * vma->vm_private_data here. 582 */ 583 584 vma->vm_private_data = bo; 585 586 /* 587 * We'd like to use VM_PFNMAP on shared mappings, where 588 * (vma->vm_flags & VM_SHARED) != 0, for performance reasons, 589 * but for some reason VM_PFNMAP + x86 PAT + write-combine is very 590 * bad for performance. Until that has been sorted out, use 591 * VM_MIXEDMAP on all mappings. See freedesktop.org bug #75719 592 */ 593 vma->vm_flags |= VM_MIXEDMAP; 594 vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; 595 } 596 597 int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma, 598 struct ttm_bo_device *bdev) 599 { 600 struct ttm_bo_driver *driver; 601 struct ttm_buffer_object *bo; 602 int ret; 603 604 if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET_START)) 605 return -EINVAL; 606 607 bo = ttm_bo_vm_lookup(bdev, vma->vm_pgoff, vma_pages(vma)); 608 if (unlikely(!bo)) 609 return -EINVAL; 610 611 driver = bo->bdev->driver; 612 if (unlikely(!driver->verify_access)) { 613 ret = -EPERM; 614 goto out_unref; 615 } 616 ret = driver->verify_access(bo, filp); 617 if (unlikely(ret != 0)) 618 goto out_unref; 619 620 ttm_bo_mmap_vma_setup(bo, vma); 621 return 0; 622 out_unref: 623 ttm_bo_put(bo); 624 return ret; 625 } 626 EXPORT_SYMBOL(ttm_bo_mmap); 627 628 int ttm_bo_mmap_obj(struct vm_area_struct *vma, struct ttm_buffer_object *bo) 629 { 630 ttm_bo_get(bo); 631 ttm_bo_mmap_vma_setup(bo, vma); 632 return 0; 633 } 634 EXPORT_SYMBOL(ttm_bo_mmap_obj); 635