1 /* SPDX-License-Identifier: GPL-2.0 OR MIT */ 2 /************************************************************************** 3 * 4 * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 22 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 23 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 24 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 25 * USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 /* 29 * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com> 30 */ 31 32 #define pr_fmt(fmt) "[TTM] " fmt 33 34 #include <drm/ttm/ttm_bo_driver.h> 35 #include <drm/ttm/ttm_placement.h> 36 #include <drm/drm_vma_manager.h> 37 #include <linux/mm.h> 38 #include <linux/pfn_t.h> 39 #include <linux/rbtree.h> 40 #include <linux/module.h> 41 #include <linux/uaccess.h> 42 #include <linux/mem_encrypt.h> 43 44 static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, 45 struct vm_fault *vmf) 46 { 47 vm_fault_t ret = 0; 48 int err = 0; 49 50 if (likely(!bo->moving)) 51 goto out_unlock; 52 53 /* 54 * Quick non-stalling check for idle. 55 */ 56 if (dma_fence_is_signaled(bo->moving)) 57 goto out_clear; 58 59 /* 60 * If possible, avoid waiting for GPU with mmap_lock 61 * held. We only do this if the fault allows retry and this 62 * is the first attempt. 63 */ 64 if (fault_flag_allow_retry_first(vmf->flags)) { 65 ret = VM_FAULT_RETRY; 66 if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) 67 goto out_unlock; 68 69 ttm_bo_get(bo); 70 mmap_read_unlock(vmf->vma->vm_mm); 71 (void) dma_fence_wait(bo->moving, true); 72 dma_resv_unlock(bo->base.resv); 73 ttm_bo_put(bo); 74 goto out_unlock; 75 } 76 77 /* 78 * Ordinary wait. 79 */ 80 err = dma_fence_wait(bo->moving, true); 81 if (unlikely(err != 0)) { 82 ret = (err != -ERESTARTSYS) ? VM_FAULT_SIGBUS : 83 VM_FAULT_NOPAGE; 84 goto out_unlock; 85 } 86 87 out_clear: 88 dma_fence_put(bo->moving); 89 bo->moving = NULL; 90 91 out_unlock: 92 return ret; 93 } 94 95 static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo, 96 unsigned long page_offset) 97 { 98 struct ttm_bo_device *bdev = bo->bdev; 99 100 if (bdev->driver->io_mem_pfn) 101 return bdev->driver->io_mem_pfn(bo, page_offset); 102 103 return (bo->mem.bus.offset >> PAGE_SHIFT) + page_offset; 104 } 105 106 /** 107 * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback 108 * @bo: The buffer object 109 * @vmf: The fault structure handed to the callback 110 * 111 * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped 112 * during long waits, and after the wait the callback will be restarted. This 113 * is to allow other threads using the same virtual memory space concurrent 114 * access to map(), unmap() completely unrelated buffer objects. TTM buffer 115 * object reservations sometimes wait for GPU and should therefore be 116 * considered long waits. This function reserves the buffer object interruptibly 117 * taking this into account. Starvation is avoided by the vm system not 118 * allowing too many repeated restarts. 119 * This function is intended to be used in customized fault() and _mkwrite() 120 * handlers. 121 * 122 * Return: 123 * 0 on success and the bo was reserved. 124 * VM_FAULT_RETRY if blocking wait. 125 * VM_FAULT_NOPAGE if blocking wait and retrying was not allowed. 126 */ 127 vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, 128 struct vm_fault *vmf) 129 { 130 /* 131 * Work around locking order reversal in fault / nopfn 132 * between mmap_lock and bo_reserve: Perform a trylock operation 133 * for reserve, and if it fails, retry the fault after waiting 134 * for the buffer to become unreserved. 135 */ 136 if (unlikely(!dma_resv_trylock(bo->base.resv))) { 137 /* 138 * If the fault allows retry and this is the first 139 * fault attempt, we try to release the mmap_lock 140 * before waiting 141 */ 142 if (fault_flag_allow_retry_first(vmf->flags)) { 143 if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { 144 ttm_bo_get(bo); 145 mmap_read_unlock(vmf->vma->vm_mm); 146 if (!dma_resv_lock_interruptible(bo->base.resv, 147 NULL)) 148 dma_resv_unlock(bo->base.resv); 149 ttm_bo_put(bo); 150 } 151 152 return VM_FAULT_RETRY; 153 } 154 155 if (dma_resv_lock_interruptible(bo->base.resv, NULL)) 156 return VM_FAULT_NOPAGE; 157 } 158 159 /* 160 * Refuse to fault imported pages. This should be handled 161 * (if at all) by redirecting mmap to the exporter. 162 */ 163 if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) { 164 dma_resv_unlock(bo->base.resv); 165 return VM_FAULT_SIGBUS; 166 } 167 168 return 0; 169 } 170 EXPORT_SYMBOL(ttm_bo_vm_reserve); 171 172 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 173 /** 174 * ttm_bo_vm_insert_huge - Insert a pfn for PUD or PMD faults 175 * @vmf: Fault data 176 * @bo: The buffer object 177 * @page_offset: Page offset from bo start 178 * @fault_page_size: The size of the fault in pages. 179 * @pgprot: The page protections. 180 * Does additional checking whether it's possible to insert a PUD or PMD 181 * pfn and performs the insertion. 182 * 183 * Return: VM_FAULT_NOPAGE on successful insertion, VM_FAULT_FALLBACK if 184 * a huge fault was not possible, or on insertion error. 185 */ 186 static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf, 187 struct ttm_buffer_object *bo, 188 pgoff_t page_offset, 189 pgoff_t fault_page_size, 190 pgprot_t pgprot) 191 { 192 pgoff_t i; 193 vm_fault_t ret; 194 unsigned long pfn; 195 pfn_t pfnt; 196 struct ttm_tt *ttm = bo->ttm; 197 bool write = vmf->flags & FAULT_FLAG_WRITE; 198 199 /* Fault should not cross bo boundary. */ 200 page_offset &= ~(fault_page_size - 1); 201 if (page_offset + fault_page_size > bo->mem.num_pages) 202 goto out_fallback; 203 204 if (bo->mem.bus.is_iomem) 205 pfn = ttm_bo_io_mem_pfn(bo, page_offset); 206 else 207 pfn = page_to_pfn(ttm->pages[page_offset]); 208 209 /* pfn must be fault_page_size aligned. */ 210 if ((pfn & (fault_page_size - 1)) != 0) 211 goto out_fallback; 212 213 /* Check that memory is contiguous. */ 214 if (!bo->mem.bus.is_iomem) { 215 for (i = 1; i < fault_page_size; ++i) { 216 if (page_to_pfn(ttm->pages[page_offset + i]) != pfn + i) 217 goto out_fallback; 218 } 219 } else if (bo->bdev->driver->io_mem_pfn) { 220 for (i = 1; i < fault_page_size; ++i) { 221 if (ttm_bo_io_mem_pfn(bo, page_offset + i) != pfn + i) 222 goto out_fallback; 223 } 224 } 225 226 pfnt = __pfn_to_pfn_t(pfn, PFN_DEV); 227 if (fault_page_size == (HPAGE_PMD_SIZE >> PAGE_SHIFT)) 228 ret = vmf_insert_pfn_pmd_prot(vmf, pfnt, pgprot, write); 229 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD 230 else if (fault_page_size == (HPAGE_PUD_SIZE >> PAGE_SHIFT)) 231 ret = vmf_insert_pfn_pud_prot(vmf, pfnt, pgprot, write); 232 #endif 233 else 234 WARN_ON_ONCE(ret = VM_FAULT_FALLBACK); 235 236 if (ret != VM_FAULT_NOPAGE) 237 goto out_fallback; 238 239 return VM_FAULT_NOPAGE; 240 out_fallback: 241 count_vm_event(THP_FAULT_FALLBACK); 242 return VM_FAULT_FALLBACK; 243 } 244 #else 245 static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf, 246 struct ttm_buffer_object *bo, 247 pgoff_t page_offset, 248 pgoff_t fault_page_size, 249 pgprot_t pgprot) 250 { 251 return VM_FAULT_FALLBACK; 252 } 253 #endif 254 255 /** 256 * ttm_bo_vm_fault_reserved - TTM fault helper 257 * @vmf: The struct vm_fault given as argument to the fault callback 258 * @prot: The page protection to be used for this memory area. 259 * @num_prefault: Maximum number of prefault pages. The caller may want to 260 * specify this based on madvice settings and the size of the GPU object 261 * backed by the memory. 262 * @fault_page_size: The size of the fault in pages. 263 * 264 * This function inserts one or more page table entries pointing to the 265 * memory backing the buffer object, and then returns a return code 266 * instructing the caller to retry the page access. 267 * 268 * Return: 269 * VM_FAULT_NOPAGE on success or pending signal 270 * VM_FAULT_SIGBUS on unspecified error 271 * VM_FAULT_OOM on out-of-memory 272 * VM_FAULT_RETRY if retryable wait 273 */ 274 vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, 275 pgprot_t prot, 276 pgoff_t num_prefault, 277 pgoff_t fault_page_size) 278 { 279 struct vm_area_struct *vma = vmf->vma; 280 struct ttm_buffer_object *bo = vma->vm_private_data; 281 struct ttm_bo_device *bdev = bo->bdev; 282 unsigned long page_offset; 283 unsigned long page_last; 284 unsigned long pfn; 285 struct ttm_tt *ttm = NULL; 286 struct page *page; 287 int err; 288 pgoff_t i; 289 vm_fault_t ret = VM_FAULT_NOPAGE; 290 unsigned long address = vmf->address; 291 292 /* 293 * Wait for buffer data in transit, due to a pipelined 294 * move. 295 */ 296 ret = ttm_bo_vm_fault_idle(bo, vmf); 297 if (unlikely(ret != 0)) 298 return ret; 299 300 err = ttm_mem_io_reserve(bdev, &bo->mem); 301 if (unlikely(err != 0)) 302 return VM_FAULT_SIGBUS; 303 304 page_offset = ((address - vma->vm_start) >> PAGE_SHIFT) + 305 vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node); 306 page_last = vma_pages(vma) + vma->vm_pgoff - 307 drm_vma_node_start(&bo->base.vma_node); 308 309 if (unlikely(page_offset >= bo->mem.num_pages)) 310 return VM_FAULT_SIGBUS; 311 312 prot = ttm_io_prot(bo, &bo->mem, prot); 313 if (!bo->mem.bus.is_iomem) { 314 struct ttm_operation_ctx ctx = { 315 .interruptible = false, 316 .no_wait_gpu = false, 317 .force_alloc = true 318 }; 319 320 ttm = bo->ttm; 321 if (ttm_tt_populate(bdev, bo->ttm, &ctx)) 322 return VM_FAULT_OOM; 323 } else { 324 /* Iomem should not be marked encrypted */ 325 prot = pgprot_decrypted(prot); 326 } 327 328 /* We don't prefault on huge faults. Yet. */ 329 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && fault_page_size != 1) 330 return ttm_bo_vm_insert_huge(vmf, bo, page_offset, 331 fault_page_size, prot); 332 333 /* 334 * Speculatively prefault a number of pages. Only error on 335 * first page. 336 */ 337 for (i = 0; i < num_prefault; ++i) { 338 if (bo->mem.bus.is_iomem) { 339 pfn = ttm_bo_io_mem_pfn(bo, page_offset); 340 } else { 341 page = ttm->pages[page_offset]; 342 if (unlikely(!page && i == 0)) { 343 return VM_FAULT_OOM; 344 } else if (unlikely(!page)) { 345 break; 346 } 347 page->index = drm_vma_node_start(&bo->base.vma_node) + 348 page_offset; 349 pfn = page_to_pfn(page); 350 } 351 352 /* 353 * Note that the value of @prot at this point may differ from 354 * the value of @vma->vm_page_prot in the caching- and 355 * encryption bits. This is because the exact location of the 356 * data may not be known at mmap() time and may also change 357 * at arbitrary times while the data is mmap'ed. 358 * See vmf_insert_mixed_prot() for a discussion. 359 */ 360 if (vma->vm_flags & VM_MIXEDMAP) 361 ret = vmf_insert_mixed_prot(vma, address, 362 __pfn_to_pfn_t(pfn, PFN_DEV), 363 prot); 364 else 365 ret = vmf_insert_pfn_prot(vma, address, pfn, prot); 366 367 /* Never error on prefaulted PTEs */ 368 if (unlikely((ret & VM_FAULT_ERROR))) { 369 if (i == 0) 370 return VM_FAULT_NOPAGE; 371 else 372 break; 373 } 374 375 address += PAGE_SIZE; 376 if (unlikely(++page_offset >= page_last)) 377 break; 378 } 379 return ret; 380 } 381 EXPORT_SYMBOL(ttm_bo_vm_fault_reserved); 382 383 vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) 384 { 385 struct vm_area_struct *vma = vmf->vma; 386 pgprot_t prot; 387 struct ttm_buffer_object *bo = vma->vm_private_data; 388 vm_fault_t ret; 389 390 ret = ttm_bo_vm_reserve(bo, vmf); 391 if (ret) 392 return ret; 393 394 prot = vma->vm_page_prot; 395 ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); 396 if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) 397 return ret; 398 399 dma_resv_unlock(bo->base.resv); 400 401 return ret; 402 } 403 EXPORT_SYMBOL(ttm_bo_vm_fault); 404 405 void ttm_bo_vm_open(struct vm_area_struct *vma) 406 { 407 struct ttm_buffer_object *bo = vma->vm_private_data; 408 409 WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping); 410 411 ttm_bo_get(bo); 412 } 413 EXPORT_SYMBOL(ttm_bo_vm_open); 414 415 void ttm_bo_vm_close(struct vm_area_struct *vma) 416 { 417 struct ttm_buffer_object *bo = vma->vm_private_data; 418 419 ttm_bo_put(bo); 420 vma->vm_private_data = NULL; 421 } 422 EXPORT_SYMBOL(ttm_bo_vm_close); 423 424 static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo, 425 unsigned long offset, 426 uint8_t *buf, int len, int write) 427 { 428 unsigned long page = offset >> PAGE_SHIFT; 429 unsigned long bytes_left = len; 430 int ret; 431 432 /* Copy a page at a time, that way no extra virtual address 433 * mapping is needed 434 */ 435 offset -= page << PAGE_SHIFT; 436 do { 437 unsigned long bytes = min(bytes_left, PAGE_SIZE - offset); 438 struct ttm_bo_kmap_obj map; 439 void *ptr; 440 bool is_iomem; 441 442 ret = ttm_bo_kmap(bo, page, 1, &map); 443 if (ret) 444 return ret; 445 446 ptr = (uint8_t *)ttm_kmap_obj_virtual(&map, &is_iomem) + offset; 447 WARN_ON_ONCE(is_iomem); 448 if (write) 449 memcpy(ptr, buf, bytes); 450 else 451 memcpy(buf, ptr, bytes); 452 ttm_bo_kunmap(&map); 453 454 page++; 455 buf += bytes; 456 bytes_left -= bytes; 457 offset = 0; 458 } while (bytes_left); 459 460 return len; 461 } 462 463 int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, 464 void *buf, int len, int write) 465 { 466 struct ttm_buffer_object *bo = vma->vm_private_data; 467 unsigned long offset = (addr) - vma->vm_start + 468 ((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node)) 469 << PAGE_SHIFT); 470 int ret; 471 472 if (len < 1 || (offset + len) >> PAGE_SHIFT > bo->mem.num_pages) 473 return -EIO; 474 475 ret = ttm_bo_reserve(bo, true, false, NULL); 476 if (ret) 477 return ret; 478 479 switch (bo->mem.mem_type) { 480 case TTM_PL_SYSTEM: 481 if (unlikely(bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) { 482 ret = ttm_tt_swapin(bo->ttm); 483 if (unlikely(ret != 0)) 484 return ret; 485 } 486 fallthrough; 487 case TTM_PL_TT: 488 ret = ttm_bo_vm_access_kmap(bo, offset, buf, len, write); 489 break; 490 default: 491 if (bo->bdev->driver->access_memory) 492 ret = bo->bdev->driver->access_memory( 493 bo, offset, buf, len, write); 494 else 495 ret = -EIO; 496 } 497 498 ttm_bo_unreserve(bo); 499 500 return ret; 501 } 502 EXPORT_SYMBOL(ttm_bo_vm_access); 503 504 static const struct vm_operations_struct ttm_bo_vm_ops = { 505 .fault = ttm_bo_vm_fault, 506 .open = ttm_bo_vm_open, 507 .close = ttm_bo_vm_close, 508 .access = ttm_bo_vm_access, 509 }; 510 511 static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev, 512 unsigned long offset, 513 unsigned long pages) 514 { 515 struct drm_vma_offset_node *node; 516 struct ttm_buffer_object *bo = NULL; 517 518 drm_vma_offset_lock_lookup(bdev->vma_manager); 519 520 node = drm_vma_offset_lookup_locked(bdev->vma_manager, offset, pages); 521 if (likely(node)) { 522 bo = container_of(node, struct ttm_buffer_object, 523 base.vma_node); 524 bo = ttm_bo_get_unless_zero(bo); 525 } 526 527 drm_vma_offset_unlock_lookup(bdev->vma_manager); 528 529 if (!bo) 530 pr_err("Could not find buffer object to map\n"); 531 532 return bo; 533 } 534 535 static void ttm_bo_mmap_vma_setup(struct ttm_buffer_object *bo, struct vm_area_struct *vma) 536 { 537 vma->vm_ops = &ttm_bo_vm_ops; 538 539 /* 540 * Note: We're transferring the bo reference to 541 * vma->vm_private_data here. 542 */ 543 544 vma->vm_private_data = bo; 545 546 /* 547 * We'd like to use VM_PFNMAP on shared mappings, where 548 * (vma->vm_flags & VM_SHARED) != 0, for performance reasons, 549 * but for some reason VM_PFNMAP + x86 PAT + write-combine is very 550 * bad for performance. Until that has been sorted out, use 551 * VM_MIXEDMAP on all mappings. See freedesktop.org bug #75719 552 */ 553 vma->vm_flags |= VM_MIXEDMAP; 554 vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; 555 } 556 557 int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma, 558 struct ttm_bo_device *bdev) 559 { 560 struct ttm_bo_driver *driver; 561 struct ttm_buffer_object *bo; 562 int ret; 563 564 if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET_START)) 565 return -EINVAL; 566 567 bo = ttm_bo_vm_lookup(bdev, vma->vm_pgoff, vma_pages(vma)); 568 if (unlikely(!bo)) 569 return -EINVAL; 570 571 driver = bo->bdev->driver; 572 if (unlikely(!driver->verify_access)) { 573 ret = -EPERM; 574 goto out_unref; 575 } 576 ret = driver->verify_access(bo, filp); 577 if (unlikely(ret != 0)) 578 goto out_unref; 579 580 ttm_bo_mmap_vma_setup(bo, vma); 581 return 0; 582 out_unref: 583 ttm_bo_put(bo); 584 return ret; 585 } 586 EXPORT_SYMBOL(ttm_bo_mmap); 587 588 int ttm_bo_mmap_obj(struct vm_area_struct *vma, struct ttm_buffer_object *bo) 589 { 590 ttm_bo_get(bo); 591 ttm_bo_mmap_vma_setup(bo, vma); 592 return 0; 593 } 594 EXPORT_SYMBOL(ttm_bo_mmap_obj); 595