1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <drm/drmP.h> 29 #include <drm/amdgpu_drm.h> 30 #include "amdgpu.h" 31 #include "amdgpu_trace.h" 32 33 /* 34 * GPUVM 35 * GPUVM is similar to the legacy gart on older asics, however 36 * rather than there being a single global gart table 37 * for the entire GPU, there are multiple VM page tables active 38 * at any given time. The VM page tables can contain a mix 39 * vram pages and system memory pages and system memory pages 40 * can be mapped as snooped (cached system pages) or unsnooped 41 * (uncached system pages). 42 * Each VM has an ID associated with it and there is a page table 43 * associated with each VMID. When execting a command buffer, 44 * the kernel tells the the ring what VMID to use for that command 45 * buffer. VMIDs are allocated dynamically as commands are submitted. 46 * The userspace drivers maintain their own address space and the kernel 47 * sets up their pages tables accordingly when they submit their 48 * command buffers and a VMID is assigned. 49 * Cayman/Trinity support up to 8 active VMs at any given time; 50 * SI supports 16. 51 */ 52 53 /* Special value that no flush is necessary */ 54 #define AMDGPU_VM_NO_FLUSH (~0ll) 55 56 /** 57 * amdgpu_vm_num_pde - return the number of page directory entries 58 * 59 * @adev: amdgpu_device pointer 60 * 61 * Calculate the number of page directory entries. 62 */ 63 static unsigned amdgpu_vm_num_pdes(struct amdgpu_device *adev) 64 { 65 return adev->vm_manager.max_pfn >> amdgpu_vm_block_size; 66 } 67 68 /** 69 * amdgpu_vm_directory_size - returns the size of the page directory in bytes 70 * 71 * @adev: amdgpu_device pointer 72 * 73 * Calculate the size of the page directory in bytes. 74 */ 75 static unsigned amdgpu_vm_directory_size(struct amdgpu_device *adev) 76 { 77 return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_pdes(adev) * 8); 78 } 79 80 /** 81 * amdgpu_vm_get_pd_bo - add the VM PD to a validation list 82 * 83 * @vm: vm providing the BOs 84 * @validated: head of validation list 85 * @entry: entry to add 86 * 87 * Add the page directory to the list of BOs to 88 * validate for command submission. 89 */ 90 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, 91 struct list_head *validated, 92 struct amdgpu_bo_list_entry *entry) 93 { 94 entry->robj = vm->page_directory; 95 entry->priority = 0; 96 entry->tv.bo = &vm->page_directory->tbo; 97 entry->tv.shared = true; 98 entry->user_pages = NULL; 99 list_add(&entry->tv.head, validated); 100 } 101 102 /** 103 * amdgpu_vm_get_bos - add the vm BOs to a duplicates list 104 * 105 * @vm: vm providing the BOs 106 * @duplicates: head of duplicates list 107 * 108 * Add the page directory to the BO duplicates list 109 * for command submission. 110 */ 111 void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates) 112 { 113 unsigned i; 114 115 /* add the vm page table to the list */ 116 for (i = 0; i <= vm->max_pde_used; ++i) { 117 struct amdgpu_bo_list_entry *entry = &vm->page_tables[i].entry; 118 119 if (!entry->robj) 120 continue; 121 122 list_add(&entry->tv.head, duplicates); 123 } 124 125 } 126 127 /** 128 * amdgpu_vm_move_pt_bos_in_lru - move the PT BOs to the LRU tail 129 * 130 * @adev: amdgpu device instance 131 * @vm: vm providing the BOs 132 * 133 * Move the PT BOs to the tail of the LRU. 134 */ 135 void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, 136 struct amdgpu_vm *vm) 137 { 138 struct ttm_bo_global *glob = adev->mman.bdev.glob; 139 unsigned i; 140 141 spin_lock(&glob->lru_lock); 142 for (i = 0; i <= vm->max_pde_used; ++i) { 143 struct amdgpu_bo_list_entry *entry = &vm->page_tables[i].entry; 144 145 if (!entry->robj) 146 continue; 147 148 ttm_bo_move_to_lru_tail(&entry->robj->tbo); 149 } 150 spin_unlock(&glob->lru_lock); 151 } 152 153 /** 154 * amdgpu_vm_grab_id - allocate the next free VMID 155 * 156 * @vm: vm to allocate id for 157 * @ring: ring we want to submit job to 158 * @sync: sync object where we add dependencies 159 * @fence: fence protecting ID from reuse 160 * 161 * Allocate an id for the vm, adding fences to the sync obj as necessary. 162 */ 163 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, 164 struct amdgpu_sync *sync, struct fence *fence, 165 unsigned *vm_id, uint64_t *vm_pd_addr) 166 { 167 uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); 168 struct amdgpu_device *adev = ring->adev; 169 struct amdgpu_vm_id *id = &vm->ids[ring->idx]; 170 struct fence *updates = sync->last_vm_update; 171 int r; 172 173 mutex_lock(&adev->vm_manager.lock); 174 175 /* check if the id is still valid */ 176 if (id->mgr_id) { 177 struct fence *flushed = id->flushed_updates; 178 bool is_later; 179 long owner; 180 181 if (!flushed) 182 is_later = true; 183 else if (!updates) 184 is_later = false; 185 else 186 is_later = fence_is_later(updates, flushed); 187 188 owner = atomic_long_read(&id->mgr_id->owner); 189 if (!is_later && owner == (long)id && 190 pd_addr == id->pd_gpu_addr) { 191 192 r = amdgpu_sync_fence(ring->adev, sync, 193 id->mgr_id->active); 194 if (r) { 195 mutex_unlock(&adev->vm_manager.lock); 196 return r; 197 } 198 199 fence_put(id->mgr_id->active); 200 id->mgr_id->active = fence_get(fence); 201 202 list_move_tail(&id->mgr_id->list, 203 &adev->vm_manager.ids_lru); 204 205 *vm_id = id->mgr_id - adev->vm_manager.ids; 206 *vm_pd_addr = AMDGPU_VM_NO_FLUSH; 207 trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, 208 *vm_pd_addr); 209 210 mutex_unlock(&adev->vm_manager.lock); 211 return 0; 212 } 213 } 214 215 id->mgr_id = list_first_entry(&adev->vm_manager.ids_lru, 216 struct amdgpu_vm_manager_id, 217 list); 218 219 r = amdgpu_sync_fence(ring->adev, sync, id->mgr_id->active); 220 if (!r) { 221 fence_put(id->mgr_id->active); 222 id->mgr_id->active = fence_get(fence); 223 224 fence_put(id->flushed_updates); 225 id->flushed_updates = fence_get(updates); 226 227 id->pd_gpu_addr = pd_addr; 228 229 list_move_tail(&id->mgr_id->list, &adev->vm_manager.ids_lru); 230 atomic_long_set(&id->mgr_id->owner, (long)id); 231 232 *vm_id = id->mgr_id - adev->vm_manager.ids; 233 *vm_pd_addr = pd_addr; 234 trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr); 235 } 236 237 mutex_unlock(&adev->vm_manager.lock); 238 return r; 239 } 240 241 /** 242 * amdgpu_vm_flush - hardware flush the vm 243 * 244 * @ring: ring to use for flush 245 * @vm_id: vmid number to use 246 * @pd_addr: address of the page directory 247 * 248 * Emit a VM flush when it is necessary. 249 */ 250 void amdgpu_vm_flush(struct amdgpu_ring *ring, 251 unsigned vm_id, uint64_t pd_addr, 252 uint32_t gds_base, uint32_t gds_size, 253 uint32_t gws_base, uint32_t gws_size, 254 uint32_t oa_base, uint32_t oa_size) 255 { 256 struct amdgpu_device *adev = ring->adev; 257 struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id]; 258 bool gds_switch_needed = ring->funcs->emit_gds_switch && ( 259 mgr_id->gds_base != gds_base || 260 mgr_id->gds_size != gds_size || 261 mgr_id->gws_base != gws_base || 262 mgr_id->gws_size != gws_size || 263 mgr_id->oa_base != oa_base || 264 mgr_id->oa_size != oa_size); 265 266 if (ring->funcs->emit_pipeline_sync && ( 267 pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed)) 268 amdgpu_ring_emit_pipeline_sync(ring); 269 270 if (pd_addr != AMDGPU_VM_NO_FLUSH) { 271 trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id); 272 amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr); 273 } 274 275 if (gds_switch_needed) { 276 mgr_id->gds_base = gds_base; 277 mgr_id->gds_size = gds_size; 278 mgr_id->gws_base = gws_base; 279 mgr_id->gws_size = gws_size; 280 mgr_id->oa_base = oa_base; 281 mgr_id->oa_size = oa_size; 282 amdgpu_ring_emit_gds_switch(ring, vm_id, 283 gds_base, gds_size, 284 gws_base, gws_size, 285 oa_base, oa_size); 286 } 287 } 288 289 /** 290 * amdgpu_vm_reset_id - reset VMID to zero 291 * 292 * @adev: amdgpu device structure 293 * @vm_id: vmid number to use 294 * 295 * Reset saved GDW, GWS and OA to force switch on next flush. 296 */ 297 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id) 298 { 299 struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id]; 300 301 mgr_id->gds_base = 0; 302 mgr_id->gds_size = 0; 303 mgr_id->gws_base = 0; 304 mgr_id->gws_size = 0; 305 mgr_id->oa_base = 0; 306 mgr_id->oa_size = 0; 307 } 308 309 /** 310 * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo 311 * 312 * @vm: requested vm 313 * @bo: requested buffer object 314 * 315 * Find @bo inside the requested vm. 316 * Search inside the @bos vm list for the requested vm 317 * Returns the found bo_va or NULL if none is found 318 * 319 * Object has to be reserved! 320 */ 321 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, 322 struct amdgpu_bo *bo) 323 { 324 struct amdgpu_bo_va *bo_va; 325 326 list_for_each_entry(bo_va, &bo->va, bo_list) { 327 if (bo_va->vm == vm) { 328 return bo_va; 329 } 330 } 331 return NULL; 332 } 333 334 /** 335 * amdgpu_vm_update_pages - helper to call the right asic function 336 * 337 * @adev: amdgpu_device pointer 338 * @gtt: GART instance to use for mapping 339 * @gtt_flags: GTT hw access flags 340 * @ib: indirect buffer to fill with commands 341 * @pe: addr of the page entry 342 * @addr: dst addr to write into pe 343 * @count: number of page entries to update 344 * @incr: increase next addr by incr bytes 345 * @flags: hw access flags 346 * 347 * Traces the parameters and calls the right asic functions 348 * to setup the page table using the DMA. 349 */ 350 static void amdgpu_vm_update_pages(struct amdgpu_device *adev, 351 struct amdgpu_gart *gtt, 352 uint32_t gtt_flags, 353 struct amdgpu_ib *ib, 354 uint64_t pe, uint64_t addr, 355 unsigned count, uint32_t incr, 356 uint32_t flags) 357 { 358 trace_amdgpu_vm_set_page(pe, addr, count, incr, flags); 359 360 if ((gtt == &adev->gart) && (flags == gtt_flags)) { 361 uint64_t src = gtt->table_addr + (addr >> 12) * 8; 362 amdgpu_vm_copy_pte(adev, ib, pe, src, count); 363 364 } else if (gtt) { 365 dma_addr_t *pages_addr = gtt->pages_addr; 366 amdgpu_vm_write_pte(adev, ib, pages_addr, pe, addr, 367 count, incr, flags); 368 369 } else if (count < 3) { 370 amdgpu_vm_write_pte(adev, ib, NULL, pe, addr, 371 count, incr, flags); 372 373 } else { 374 amdgpu_vm_set_pte_pde(adev, ib, pe, addr, 375 count, incr, flags); 376 } 377 } 378 379 /** 380 * amdgpu_vm_clear_bo - initially clear the page dir/table 381 * 382 * @adev: amdgpu_device pointer 383 * @bo: bo to clear 384 * 385 * need to reserve bo first before calling it. 386 */ 387 static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, 388 struct amdgpu_vm *vm, 389 struct amdgpu_bo *bo) 390 { 391 struct amdgpu_ring *ring; 392 struct fence *fence = NULL; 393 struct amdgpu_job *job; 394 unsigned entries; 395 uint64_t addr; 396 int r; 397 398 ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); 399 400 r = reservation_object_reserve_shared(bo->tbo.resv); 401 if (r) 402 return r; 403 404 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 405 if (r) 406 goto error; 407 408 addr = amdgpu_bo_gpu_offset(bo); 409 entries = amdgpu_bo_size(bo) / 8; 410 411 r = amdgpu_job_alloc_with_ib(adev, 64, &job); 412 if (r) 413 goto error; 414 415 amdgpu_vm_update_pages(adev, NULL, 0, &job->ibs[0], addr, 0, entries, 416 0, 0); 417 amdgpu_ring_pad_ib(ring, &job->ibs[0]); 418 419 WARN_ON(job->ibs[0].length_dw > 64); 420 r = amdgpu_job_submit(job, ring, &vm->entity, 421 AMDGPU_FENCE_OWNER_VM, &fence); 422 if (r) 423 goto error_free; 424 425 amdgpu_bo_fence(bo, fence, true); 426 fence_put(fence); 427 return 0; 428 429 error_free: 430 amdgpu_job_free(job); 431 432 error: 433 return r; 434 } 435 436 /** 437 * amdgpu_vm_map_gart - Resolve gart mapping of addr 438 * 439 * @pages_addr: optional DMA address to use for lookup 440 * @addr: the unmapped addr 441 * 442 * Look up the physical address of the page that the pte resolves 443 * to and return the pointer for the page table entry. 444 */ 445 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) 446 { 447 uint64_t result; 448 449 if (pages_addr) { 450 /* page table offset */ 451 result = pages_addr[addr >> PAGE_SHIFT]; 452 453 /* in case cpu page size != gpu page size*/ 454 result |= addr & (~PAGE_MASK); 455 456 } else { 457 /* No mapping required */ 458 result = addr; 459 } 460 461 result &= 0xFFFFFFFFFFFFF000ULL; 462 463 return result; 464 } 465 466 /** 467 * amdgpu_vm_update_pdes - make sure that page directory is valid 468 * 469 * @adev: amdgpu_device pointer 470 * @vm: requested vm 471 * @start: start of GPU address range 472 * @end: end of GPU address range 473 * 474 * Allocates new page tables if necessary 475 * and updates the page directory. 476 * Returns 0 for success, error for failure. 477 */ 478 int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, 479 struct amdgpu_vm *vm) 480 { 481 struct amdgpu_ring *ring; 482 struct amdgpu_bo *pd = vm->page_directory; 483 uint64_t pd_addr = amdgpu_bo_gpu_offset(pd); 484 uint32_t incr = AMDGPU_VM_PTE_COUNT * 8; 485 uint64_t last_pde = ~0, last_pt = ~0; 486 unsigned count = 0, pt_idx, ndw; 487 struct amdgpu_job *job; 488 struct amdgpu_ib *ib; 489 struct fence *fence = NULL; 490 491 int r; 492 493 ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); 494 495 /* padding, etc. */ 496 ndw = 64; 497 498 /* assume the worst case */ 499 ndw += vm->max_pde_used * 6; 500 501 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); 502 if (r) 503 return r; 504 505 ib = &job->ibs[0]; 506 507 /* walk over the address space and update the page directory */ 508 for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { 509 struct amdgpu_bo *bo = vm->page_tables[pt_idx].entry.robj; 510 uint64_t pde, pt; 511 512 if (bo == NULL) 513 continue; 514 515 pt = amdgpu_bo_gpu_offset(bo); 516 if (vm->page_tables[pt_idx].addr == pt) 517 continue; 518 vm->page_tables[pt_idx].addr = pt; 519 520 pde = pd_addr + pt_idx * 8; 521 if (((last_pde + 8 * count) != pde) || 522 ((last_pt + incr * count) != pt)) { 523 524 if (count) { 525 amdgpu_vm_update_pages(adev, NULL, 0, ib, 526 last_pde, last_pt, 527 count, incr, 528 AMDGPU_PTE_VALID); 529 } 530 531 count = 1; 532 last_pde = pde; 533 last_pt = pt; 534 } else { 535 ++count; 536 } 537 } 538 539 if (count) 540 amdgpu_vm_update_pages(adev, NULL, 0, ib, last_pde, last_pt, 541 count, incr, AMDGPU_PTE_VALID); 542 543 if (ib->length_dw != 0) { 544 amdgpu_ring_pad_ib(ring, ib); 545 amdgpu_sync_resv(adev, &job->sync, pd->tbo.resv, 546 AMDGPU_FENCE_OWNER_VM); 547 WARN_ON(ib->length_dw > ndw); 548 r = amdgpu_job_submit(job, ring, &vm->entity, 549 AMDGPU_FENCE_OWNER_VM, &fence); 550 if (r) 551 goto error_free; 552 553 amdgpu_bo_fence(pd, fence, true); 554 fence_put(vm->page_directory_fence); 555 vm->page_directory_fence = fence_get(fence); 556 fence_put(fence); 557 558 } else { 559 amdgpu_job_free(job); 560 } 561 562 return 0; 563 564 error_free: 565 amdgpu_job_free(job); 566 return r; 567 } 568 569 /** 570 * amdgpu_vm_frag_ptes - add fragment information to PTEs 571 * 572 * @adev: amdgpu_device pointer 573 * @gtt: GART instance to use for mapping 574 * @gtt_flags: GTT hw mapping flags 575 * @ib: IB for the update 576 * @pe_start: first PTE to handle 577 * @pe_end: last PTE to handle 578 * @addr: addr those PTEs should point to 579 * @flags: hw mapping flags 580 */ 581 static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, 582 struct amdgpu_gart *gtt, 583 uint32_t gtt_flags, 584 struct amdgpu_ib *ib, 585 uint64_t pe_start, uint64_t pe_end, 586 uint64_t addr, uint32_t flags) 587 { 588 /** 589 * The MC L1 TLB supports variable sized pages, based on a fragment 590 * field in the PTE. When this field is set to a non-zero value, page 591 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE 592 * flags are considered valid for all PTEs within the fragment range 593 * and corresponding mappings are assumed to be physically contiguous. 594 * 595 * The L1 TLB can store a single PTE for the whole fragment, 596 * significantly increasing the space available for translation 597 * caching. This leads to large improvements in throughput when the 598 * TLB is under pressure. 599 * 600 * The L2 TLB distributes small and large fragments into two 601 * asymmetric partitions. The large fragment cache is significantly 602 * larger. Thus, we try to use large fragments wherever possible. 603 * Userspace can support this by aligning virtual base address and 604 * allocation size to the fragment size. 605 */ 606 607 /* SI and newer are optimized for 64KB */ 608 uint64_t frag_flags = AMDGPU_PTE_FRAG_64KB; 609 uint64_t frag_align = 0x80; 610 611 uint64_t frag_start = ALIGN(pe_start, frag_align); 612 uint64_t frag_end = pe_end & ~(frag_align - 1); 613 614 unsigned count; 615 616 /* Abort early if there isn't anything to do */ 617 if (pe_start == pe_end) 618 return; 619 620 /* system pages are non continuously */ 621 if (gtt || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) { 622 623 count = (pe_end - pe_start) / 8; 624 amdgpu_vm_update_pages(adev, gtt, gtt_flags, ib, pe_start, 625 addr, count, AMDGPU_GPU_PAGE_SIZE, 626 flags); 627 return; 628 } 629 630 /* handle the 4K area at the beginning */ 631 if (pe_start != frag_start) { 632 count = (frag_start - pe_start) / 8; 633 amdgpu_vm_update_pages(adev, NULL, 0, ib, pe_start, addr, 634 count, AMDGPU_GPU_PAGE_SIZE, flags); 635 addr += AMDGPU_GPU_PAGE_SIZE * count; 636 } 637 638 /* handle the area in the middle */ 639 count = (frag_end - frag_start) / 8; 640 amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_start, addr, count, 641 AMDGPU_GPU_PAGE_SIZE, flags | frag_flags); 642 643 /* handle the 4K area at the end */ 644 if (frag_end != pe_end) { 645 addr += AMDGPU_GPU_PAGE_SIZE * count; 646 count = (pe_end - frag_end) / 8; 647 amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_end, addr, 648 count, AMDGPU_GPU_PAGE_SIZE, flags); 649 } 650 } 651 652 /** 653 * amdgpu_vm_update_ptes - make sure that page tables are valid 654 * 655 * @adev: amdgpu_device pointer 656 * @gtt: GART instance to use for mapping 657 * @gtt_flags: GTT hw mapping flags 658 * @vm: requested vm 659 * @start: start of GPU address range 660 * @end: end of GPU address range 661 * @dst: destination address to map to 662 * @flags: mapping flags 663 * 664 * Update the page tables in the range @start - @end. 665 */ 666 static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, 667 struct amdgpu_gart *gtt, 668 uint32_t gtt_flags, 669 struct amdgpu_vm *vm, 670 struct amdgpu_ib *ib, 671 uint64_t start, uint64_t end, 672 uint64_t dst, uint32_t flags) 673 { 674 const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; 675 676 uint64_t last_pe_start = ~0, last_pe_end = ~0, last_dst = ~0; 677 uint64_t addr; 678 679 /* walk over the address space and update the page tables */ 680 for (addr = start; addr < end; ) { 681 uint64_t pt_idx = addr >> amdgpu_vm_block_size; 682 struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj; 683 unsigned nptes; 684 uint64_t pe_start; 685 686 if ((addr & ~mask) == (end & ~mask)) 687 nptes = end - addr; 688 else 689 nptes = AMDGPU_VM_PTE_COUNT - (addr & mask); 690 691 pe_start = amdgpu_bo_gpu_offset(pt); 692 pe_start += (addr & mask) * 8; 693 694 if (last_pe_end != pe_start) { 695 696 amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib, 697 last_pe_start, last_pe_end, 698 last_dst, flags); 699 700 last_pe_start = pe_start; 701 last_pe_end = pe_start + 8 * nptes; 702 last_dst = dst; 703 } else { 704 last_pe_end += 8 * nptes; 705 } 706 707 addr += nptes; 708 dst += nptes * AMDGPU_GPU_PAGE_SIZE; 709 } 710 711 amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib, 712 last_pe_start, last_pe_end, 713 last_dst, flags); 714 } 715 716 /** 717 * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table 718 * 719 * @adev: amdgpu_device pointer 720 * @gtt: GART instance to use for mapping 721 * @gtt_flags: flags as they are used for GTT 722 * @vm: requested vm 723 * @start: start of mapped range 724 * @last: last mapped entry 725 * @flags: flags for the entries 726 * @addr: addr to set the area to 727 * @fence: optional resulting fence 728 * 729 * Fill in the page table entries between @start and @last. 730 * Returns 0 for success, -EINVAL for failure. 731 */ 732 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, 733 struct amdgpu_gart *gtt, 734 uint32_t gtt_flags, 735 struct amdgpu_vm *vm, 736 uint64_t start, uint64_t last, 737 uint32_t flags, uint64_t addr, 738 struct fence **fence) 739 { 740 struct amdgpu_ring *ring; 741 void *owner = AMDGPU_FENCE_OWNER_VM; 742 unsigned nptes, ncmds, ndw; 743 struct amdgpu_job *job; 744 struct amdgpu_ib *ib; 745 struct fence *f = NULL; 746 int r; 747 748 ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); 749 750 /* sync to everything on unmapping */ 751 if (!(flags & AMDGPU_PTE_VALID)) 752 owner = AMDGPU_FENCE_OWNER_UNDEFINED; 753 754 nptes = last - start + 1; 755 756 /* 757 * reserve space for one command every (1 << BLOCK_SIZE) 758 * entries or 2k dwords (whatever is smaller) 759 */ 760 ncmds = (nptes >> min(amdgpu_vm_block_size, 11)) + 1; 761 762 /* padding, etc. */ 763 ndw = 64; 764 765 if ((gtt == &adev->gart) && (flags == gtt_flags)) { 766 /* only copy commands needed */ 767 ndw += ncmds * 7; 768 769 } else if (gtt) { 770 /* header for write data commands */ 771 ndw += ncmds * 4; 772 773 /* body of write data command */ 774 ndw += nptes * 2; 775 776 } else { 777 /* set page commands needed */ 778 ndw += ncmds * 10; 779 780 /* two extra commands for begin/end of fragment */ 781 ndw += 2 * 10; 782 } 783 784 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); 785 if (r) 786 return r; 787 788 ib = &job->ibs[0]; 789 790 r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv, 791 owner); 792 if (r) 793 goto error_free; 794 795 r = reservation_object_reserve_shared(vm->page_directory->tbo.resv); 796 if (r) 797 goto error_free; 798 799 amdgpu_vm_update_ptes(adev, gtt, gtt_flags, vm, ib, start, last + 1, 800 addr, flags); 801 802 amdgpu_ring_pad_ib(ring, ib); 803 WARN_ON(ib->length_dw > ndw); 804 r = amdgpu_job_submit(job, ring, &vm->entity, 805 AMDGPU_FENCE_OWNER_VM, &f); 806 if (r) 807 goto error_free; 808 809 amdgpu_bo_fence(vm->page_directory, f, true); 810 if (fence) { 811 fence_put(*fence); 812 *fence = fence_get(f); 813 } 814 fence_put(f); 815 return 0; 816 817 error_free: 818 amdgpu_job_free(job); 819 return r; 820 } 821 822 /** 823 * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks 824 * 825 * @adev: amdgpu_device pointer 826 * @gtt: GART instance to use for mapping 827 * @vm: requested vm 828 * @mapping: mapped range and flags to use for the update 829 * @addr: addr to set the area to 830 * @gtt_flags: flags as they are used for GTT 831 * @fence: optional resulting fence 832 * 833 * Split the mapping into smaller chunks so that each update fits 834 * into a SDMA IB. 835 * Returns 0 for success, -EINVAL for failure. 836 */ 837 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, 838 struct amdgpu_gart *gtt, 839 uint32_t gtt_flags, 840 struct amdgpu_vm *vm, 841 struct amdgpu_bo_va_mapping *mapping, 842 uint64_t addr, struct fence **fence) 843 { 844 const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE; 845 846 uint64_t start = mapping->it.start; 847 uint32_t flags = gtt_flags; 848 int r; 849 850 /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here 851 * but in case of something, we filter the flags in first place 852 */ 853 if (!(mapping->flags & AMDGPU_PTE_READABLE)) 854 flags &= ~AMDGPU_PTE_READABLE; 855 if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) 856 flags &= ~AMDGPU_PTE_WRITEABLE; 857 858 trace_amdgpu_vm_bo_update(mapping); 859 860 addr += mapping->offset; 861 862 if (!gtt || ((gtt == &adev->gart) && (flags == gtt_flags))) 863 return amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm, 864 start, mapping->it.last, 865 flags, addr, fence); 866 867 while (start != mapping->it.last + 1) { 868 uint64_t last; 869 870 last = min((uint64_t)mapping->it.last, start + max_size - 1); 871 r = amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm, 872 start, last, flags, addr, 873 fence); 874 if (r) 875 return r; 876 877 start = last + 1; 878 addr += max_size * AMDGPU_GPU_PAGE_SIZE; 879 } 880 881 return 0; 882 } 883 884 /** 885 * amdgpu_vm_bo_update - update all BO mappings in the vm page table 886 * 887 * @adev: amdgpu_device pointer 888 * @bo_va: requested BO and VM object 889 * @mem: ttm mem 890 * 891 * Fill in the page table entries for @bo_va. 892 * Returns 0 for success, -EINVAL for failure. 893 * 894 * Object have to be reserved and mutex must be locked! 895 */ 896 int amdgpu_vm_bo_update(struct amdgpu_device *adev, 897 struct amdgpu_bo_va *bo_va, 898 struct ttm_mem_reg *mem) 899 { 900 struct amdgpu_vm *vm = bo_va->vm; 901 struct amdgpu_bo_va_mapping *mapping; 902 struct amdgpu_gart *gtt = NULL; 903 uint32_t flags; 904 uint64_t addr; 905 int r; 906 907 if (mem) { 908 addr = (u64)mem->start << PAGE_SHIFT; 909 switch (mem->mem_type) { 910 case TTM_PL_TT: 911 gtt = &bo_va->bo->adev->gart; 912 break; 913 914 case TTM_PL_VRAM: 915 addr += adev->vm_manager.vram_base_offset; 916 break; 917 918 default: 919 break; 920 } 921 } else { 922 addr = 0; 923 } 924 925 flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); 926 927 spin_lock(&vm->status_lock); 928 if (!list_empty(&bo_va->vm_status)) 929 list_splice_init(&bo_va->valids, &bo_va->invalids); 930 spin_unlock(&vm->status_lock); 931 932 list_for_each_entry(mapping, &bo_va->invalids, list) { 933 r = amdgpu_vm_bo_split_mapping(adev, gtt, flags, vm, mapping, addr, 934 &bo_va->last_pt_update); 935 if (r) 936 return r; 937 } 938 939 if (trace_amdgpu_vm_bo_mapping_enabled()) { 940 list_for_each_entry(mapping, &bo_va->valids, list) 941 trace_amdgpu_vm_bo_mapping(mapping); 942 943 list_for_each_entry(mapping, &bo_va->invalids, list) 944 trace_amdgpu_vm_bo_mapping(mapping); 945 } 946 947 spin_lock(&vm->status_lock); 948 list_splice_init(&bo_va->invalids, &bo_va->valids); 949 list_del_init(&bo_va->vm_status); 950 if (!mem) 951 list_add(&bo_va->vm_status, &vm->cleared); 952 spin_unlock(&vm->status_lock); 953 954 return 0; 955 } 956 957 /** 958 * amdgpu_vm_clear_freed - clear freed BOs in the PT 959 * 960 * @adev: amdgpu_device pointer 961 * @vm: requested vm 962 * 963 * Make sure all freed BOs are cleared in the PT. 964 * Returns 0 for success. 965 * 966 * PTs have to be reserved and mutex must be locked! 967 */ 968 int amdgpu_vm_clear_freed(struct amdgpu_device *adev, 969 struct amdgpu_vm *vm) 970 { 971 struct amdgpu_bo_va_mapping *mapping; 972 int r; 973 974 while (!list_empty(&vm->freed)) { 975 mapping = list_first_entry(&vm->freed, 976 struct amdgpu_bo_va_mapping, list); 977 list_del(&mapping->list); 978 979 r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, vm, mapping, 980 0, NULL); 981 kfree(mapping); 982 if (r) 983 return r; 984 985 } 986 return 0; 987 988 } 989 990 /** 991 * amdgpu_vm_clear_invalids - clear invalidated BOs in the PT 992 * 993 * @adev: amdgpu_device pointer 994 * @vm: requested vm 995 * 996 * Make sure all invalidated BOs are cleared in the PT. 997 * Returns 0 for success. 998 * 999 * PTs have to be reserved and mutex must be locked! 1000 */ 1001 int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, 1002 struct amdgpu_vm *vm, struct amdgpu_sync *sync) 1003 { 1004 struct amdgpu_bo_va *bo_va = NULL; 1005 int r = 0; 1006 1007 spin_lock(&vm->status_lock); 1008 while (!list_empty(&vm->invalidated)) { 1009 bo_va = list_first_entry(&vm->invalidated, 1010 struct amdgpu_bo_va, vm_status); 1011 spin_unlock(&vm->status_lock); 1012 1013 r = amdgpu_vm_bo_update(adev, bo_va, NULL); 1014 if (r) 1015 return r; 1016 1017 spin_lock(&vm->status_lock); 1018 } 1019 spin_unlock(&vm->status_lock); 1020 1021 if (bo_va) 1022 r = amdgpu_sync_fence(adev, sync, bo_va->last_pt_update); 1023 1024 return r; 1025 } 1026 1027 /** 1028 * amdgpu_vm_bo_add - add a bo to a specific vm 1029 * 1030 * @adev: amdgpu_device pointer 1031 * @vm: requested vm 1032 * @bo: amdgpu buffer object 1033 * 1034 * Add @bo into the requested vm. 1035 * Add @bo to the list of bos associated with the vm 1036 * Returns newly added bo_va or NULL for failure 1037 * 1038 * Object has to be reserved! 1039 */ 1040 struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, 1041 struct amdgpu_vm *vm, 1042 struct amdgpu_bo *bo) 1043 { 1044 struct amdgpu_bo_va *bo_va; 1045 1046 bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL); 1047 if (bo_va == NULL) { 1048 return NULL; 1049 } 1050 bo_va->vm = vm; 1051 bo_va->bo = bo; 1052 bo_va->ref_count = 1; 1053 INIT_LIST_HEAD(&bo_va->bo_list); 1054 INIT_LIST_HEAD(&bo_va->valids); 1055 INIT_LIST_HEAD(&bo_va->invalids); 1056 INIT_LIST_HEAD(&bo_va->vm_status); 1057 1058 list_add_tail(&bo_va->bo_list, &bo->va); 1059 1060 return bo_va; 1061 } 1062 1063 /** 1064 * amdgpu_vm_bo_map - map bo inside a vm 1065 * 1066 * @adev: amdgpu_device pointer 1067 * @bo_va: bo_va to store the address 1068 * @saddr: where to map the BO 1069 * @offset: requested offset in the BO 1070 * @flags: attributes of pages (read/write/valid/etc.) 1071 * 1072 * Add a mapping of the BO at the specefied addr into the VM. 1073 * Returns 0 for success, error for failure. 1074 * 1075 * Object has to be reserved and unreserved outside! 1076 */ 1077 int amdgpu_vm_bo_map(struct amdgpu_device *adev, 1078 struct amdgpu_bo_va *bo_va, 1079 uint64_t saddr, uint64_t offset, 1080 uint64_t size, uint32_t flags) 1081 { 1082 struct amdgpu_bo_va_mapping *mapping; 1083 struct amdgpu_vm *vm = bo_va->vm; 1084 struct interval_tree_node *it; 1085 unsigned last_pfn, pt_idx; 1086 uint64_t eaddr; 1087 int r; 1088 1089 /* validate the parameters */ 1090 if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK || 1091 size == 0 || size & AMDGPU_GPU_PAGE_MASK) 1092 return -EINVAL; 1093 1094 /* make sure object fit at this offset */ 1095 eaddr = saddr + size - 1; 1096 if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo))) 1097 return -EINVAL; 1098 1099 last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; 1100 if (last_pfn >= adev->vm_manager.max_pfn) { 1101 dev_err(adev->dev, "va above limit (0x%08X >= 0x%08X)\n", 1102 last_pfn, adev->vm_manager.max_pfn); 1103 return -EINVAL; 1104 } 1105 1106 saddr /= AMDGPU_GPU_PAGE_SIZE; 1107 eaddr /= AMDGPU_GPU_PAGE_SIZE; 1108 1109 it = interval_tree_iter_first(&vm->va, saddr, eaddr); 1110 if (it) { 1111 struct amdgpu_bo_va_mapping *tmp; 1112 tmp = container_of(it, struct amdgpu_bo_va_mapping, it); 1113 /* bo and tmp overlap, invalid addr */ 1114 dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " 1115 "0x%010lx-0x%010lx\n", bo_va->bo, saddr, eaddr, 1116 tmp->it.start, tmp->it.last + 1); 1117 r = -EINVAL; 1118 goto error; 1119 } 1120 1121 mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); 1122 if (!mapping) { 1123 r = -ENOMEM; 1124 goto error; 1125 } 1126 1127 INIT_LIST_HEAD(&mapping->list); 1128 mapping->it.start = saddr; 1129 mapping->it.last = eaddr; 1130 mapping->offset = offset; 1131 mapping->flags = flags; 1132 1133 list_add(&mapping->list, &bo_va->invalids); 1134 interval_tree_insert(&mapping->it, &vm->va); 1135 1136 /* Make sure the page tables are allocated */ 1137 saddr >>= amdgpu_vm_block_size; 1138 eaddr >>= amdgpu_vm_block_size; 1139 1140 BUG_ON(eaddr >= amdgpu_vm_num_pdes(adev)); 1141 1142 if (eaddr > vm->max_pde_used) 1143 vm->max_pde_used = eaddr; 1144 1145 /* walk over the address space and allocate the page tables */ 1146 for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) { 1147 struct reservation_object *resv = vm->page_directory->tbo.resv; 1148 struct amdgpu_bo_list_entry *entry; 1149 struct amdgpu_bo *pt; 1150 1151 entry = &vm->page_tables[pt_idx].entry; 1152 if (entry->robj) 1153 continue; 1154 1155 r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8, 1156 AMDGPU_GPU_PAGE_SIZE, true, 1157 AMDGPU_GEM_DOMAIN_VRAM, 1158 AMDGPU_GEM_CREATE_NO_CPU_ACCESS, 1159 NULL, resv, &pt); 1160 if (r) 1161 goto error_free; 1162 1163 /* Keep a reference to the page table to avoid freeing 1164 * them up in the wrong order. 1165 */ 1166 pt->parent = amdgpu_bo_ref(vm->page_directory); 1167 1168 r = amdgpu_vm_clear_bo(adev, vm, pt); 1169 if (r) { 1170 amdgpu_bo_unref(&pt); 1171 goto error_free; 1172 } 1173 1174 entry->robj = pt; 1175 entry->priority = 0; 1176 entry->tv.bo = &entry->robj->tbo; 1177 entry->tv.shared = true; 1178 entry->user_pages = NULL; 1179 vm->page_tables[pt_idx].addr = 0; 1180 } 1181 1182 return 0; 1183 1184 error_free: 1185 list_del(&mapping->list); 1186 interval_tree_remove(&mapping->it, &vm->va); 1187 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 1188 kfree(mapping); 1189 1190 error: 1191 return r; 1192 } 1193 1194 /** 1195 * amdgpu_vm_bo_unmap - remove bo mapping from vm 1196 * 1197 * @adev: amdgpu_device pointer 1198 * @bo_va: bo_va to remove the address from 1199 * @saddr: where to the BO is mapped 1200 * 1201 * Remove a mapping of the BO at the specefied addr from the VM. 1202 * Returns 0 for success, error for failure. 1203 * 1204 * Object has to be reserved and unreserved outside! 1205 */ 1206 int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, 1207 struct amdgpu_bo_va *bo_va, 1208 uint64_t saddr) 1209 { 1210 struct amdgpu_bo_va_mapping *mapping; 1211 struct amdgpu_vm *vm = bo_va->vm; 1212 bool valid = true; 1213 1214 saddr /= AMDGPU_GPU_PAGE_SIZE; 1215 1216 list_for_each_entry(mapping, &bo_va->valids, list) { 1217 if (mapping->it.start == saddr) 1218 break; 1219 } 1220 1221 if (&mapping->list == &bo_va->valids) { 1222 valid = false; 1223 1224 list_for_each_entry(mapping, &bo_va->invalids, list) { 1225 if (mapping->it.start == saddr) 1226 break; 1227 } 1228 1229 if (&mapping->list == &bo_va->invalids) 1230 return -ENOENT; 1231 } 1232 1233 list_del(&mapping->list); 1234 interval_tree_remove(&mapping->it, &vm->va); 1235 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 1236 1237 if (valid) 1238 list_add(&mapping->list, &vm->freed); 1239 else 1240 kfree(mapping); 1241 1242 return 0; 1243 } 1244 1245 /** 1246 * amdgpu_vm_bo_rmv - remove a bo to a specific vm 1247 * 1248 * @adev: amdgpu_device pointer 1249 * @bo_va: requested bo_va 1250 * 1251 * Remove @bo_va->bo from the requested vm. 1252 * 1253 * Object have to be reserved! 1254 */ 1255 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, 1256 struct amdgpu_bo_va *bo_va) 1257 { 1258 struct amdgpu_bo_va_mapping *mapping, *next; 1259 struct amdgpu_vm *vm = bo_va->vm; 1260 1261 list_del(&bo_va->bo_list); 1262 1263 spin_lock(&vm->status_lock); 1264 list_del(&bo_va->vm_status); 1265 spin_unlock(&vm->status_lock); 1266 1267 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { 1268 list_del(&mapping->list); 1269 interval_tree_remove(&mapping->it, &vm->va); 1270 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 1271 list_add(&mapping->list, &vm->freed); 1272 } 1273 list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { 1274 list_del(&mapping->list); 1275 interval_tree_remove(&mapping->it, &vm->va); 1276 kfree(mapping); 1277 } 1278 1279 fence_put(bo_va->last_pt_update); 1280 kfree(bo_va); 1281 } 1282 1283 /** 1284 * amdgpu_vm_bo_invalidate - mark the bo as invalid 1285 * 1286 * @adev: amdgpu_device pointer 1287 * @vm: requested vm 1288 * @bo: amdgpu buffer object 1289 * 1290 * Mark @bo as invalid. 1291 */ 1292 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, 1293 struct amdgpu_bo *bo) 1294 { 1295 struct amdgpu_bo_va *bo_va; 1296 1297 list_for_each_entry(bo_va, &bo->va, bo_list) { 1298 spin_lock(&bo_va->vm->status_lock); 1299 if (list_empty(&bo_va->vm_status)) 1300 list_add(&bo_va->vm_status, &bo_va->vm->invalidated); 1301 spin_unlock(&bo_va->vm->status_lock); 1302 } 1303 } 1304 1305 /** 1306 * amdgpu_vm_init - initialize a vm instance 1307 * 1308 * @adev: amdgpu_device pointer 1309 * @vm: requested vm 1310 * 1311 * Init @vm fields. 1312 */ 1313 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) 1314 { 1315 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, 1316 AMDGPU_VM_PTE_COUNT * 8); 1317 unsigned pd_size, pd_entries; 1318 unsigned ring_instance; 1319 struct amdgpu_ring *ring; 1320 struct amd_sched_rq *rq; 1321 int i, r; 1322 1323 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 1324 vm->ids[i].mgr_id = NULL; 1325 vm->ids[i].flushed_updates = NULL; 1326 } 1327 vm->va = RB_ROOT; 1328 spin_lock_init(&vm->status_lock); 1329 INIT_LIST_HEAD(&vm->invalidated); 1330 INIT_LIST_HEAD(&vm->cleared); 1331 INIT_LIST_HEAD(&vm->freed); 1332 1333 pd_size = amdgpu_vm_directory_size(adev); 1334 pd_entries = amdgpu_vm_num_pdes(adev); 1335 1336 /* allocate page table array */ 1337 vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt)); 1338 if (vm->page_tables == NULL) { 1339 DRM_ERROR("Cannot allocate memory for page table array\n"); 1340 return -ENOMEM; 1341 } 1342 1343 /* create scheduler entity for page table updates */ 1344 1345 ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring); 1346 ring_instance %= adev->vm_manager.vm_pte_num_rings; 1347 ring = adev->vm_manager.vm_pte_rings[ring_instance]; 1348 rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; 1349 r = amd_sched_entity_init(&ring->sched, &vm->entity, 1350 rq, amdgpu_sched_jobs); 1351 if (r) 1352 return r; 1353 1354 vm->page_directory_fence = NULL; 1355 1356 r = amdgpu_bo_create(adev, pd_size, align, true, 1357 AMDGPU_GEM_DOMAIN_VRAM, 1358 AMDGPU_GEM_CREATE_NO_CPU_ACCESS, 1359 NULL, NULL, &vm->page_directory); 1360 if (r) 1361 goto error_free_sched_entity; 1362 1363 r = amdgpu_bo_reserve(vm->page_directory, false); 1364 if (r) 1365 goto error_free_page_directory; 1366 1367 r = amdgpu_vm_clear_bo(adev, vm, vm->page_directory); 1368 amdgpu_bo_unreserve(vm->page_directory); 1369 if (r) 1370 goto error_free_page_directory; 1371 1372 return 0; 1373 1374 error_free_page_directory: 1375 amdgpu_bo_unref(&vm->page_directory); 1376 vm->page_directory = NULL; 1377 1378 error_free_sched_entity: 1379 amd_sched_entity_fini(&ring->sched, &vm->entity); 1380 1381 return r; 1382 } 1383 1384 /** 1385 * amdgpu_vm_fini - tear down a vm instance 1386 * 1387 * @adev: amdgpu_device pointer 1388 * @vm: requested vm 1389 * 1390 * Tear down @vm. 1391 * Unbind the VM and remove all bos from the vm bo list 1392 */ 1393 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) 1394 { 1395 struct amdgpu_bo_va_mapping *mapping, *tmp; 1396 int i; 1397 1398 amd_sched_entity_fini(vm->entity.sched, &vm->entity); 1399 1400 if (!RB_EMPTY_ROOT(&vm->va)) { 1401 dev_err(adev->dev, "still active bo inside vm\n"); 1402 } 1403 rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, it.rb) { 1404 list_del(&mapping->list); 1405 interval_tree_remove(&mapping->it, &vm->va); 1406 kfree(mapping); 1407 } 1408 list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { 1409 list_del(&mapping->list); 1410 kfree(mapping); 1411 } 1412 1413 for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) 1414 amdgpu_bo_unref(&vm->page_tables[i].entry.robj); 1415 drm_free_large(vm->page_tables); 1416 1417 amdgpu_bo_unref(&vm->page_directory); 1418 fence_put(vm->page_directory_fence); 1419 1420 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 1421 struct amdgpu_vm_id *id = &vm->ids[i]; 1422 1423 if (id->mgr_id) 1424 atomic_long_cmpxchg(&id->mgr_id->owner, 1425 (long)id, 0); 1426 fence_put(id->flushed_updates); 1427 } 1428 } 1429 1430 /** 1431 * amdgpu_vm_manager_init - init the VM manager 1432 * 1433 * @adev: amdgpu_device pointer 1434 * 1435 * Initialize the VM manager structures 1436 */ 1437 void amdgpu_vm_manager_init(struct amdgpu_device *adev) 1438 { 1439 unsigned i; 1440 1441 INIT_LIST_HEAD(&adev->vm_manager.ids_lru); 1442 1443 /* skip over VMID 0, since it is the system VM */ 1444 for (i = 1; i < adev->vm_manager.num_ids; ++i) { 1445 amdgpu_vm_reset_id(adev, i); 1446 list_add_tail(&adev->vm_manager.ids[i].list, 1447 &adev->vm_manager.ids_lru); 1448 } 1449 1450 atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); 1451 } 1452 1453 /** 1454 * amdgpu_vm_manager_fini - cleanup VM manager 1455 * 1456 * @adev: amdgpu_device pointer 1457 * 1458 * Cleanup the VM manager and free resources. 1459 */ 1460 void amdgpu_vm_manager_fini(struct amdgpu_device *adev) 1461 { 1462 unsigned i; 1463 1464 for (i = 0; i < AMDGPU_NUM_VM; ++i) 1465 fence_put(adev->vm_manager.ids[i].active); 1466 } 1467