1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <linux/dma-fence-array.h> 29 #include <linux/interval_tree_generic.h> 30 #include <linux/idr.h> 31 #include <drm/drmP.h> 32 #include <drm/amdgpu_drm.h> 33 #include "amdgpu.h" 34 #include "amdgpu_trace.h" 35 36 /* 37 * GPUVM 38 * GPUVM is similar to the legacy gart on older asics, however 39 * rather than there being a single global gart table 40 * for the entire GPU, there are multiple VM page tables active 41 * at any given time. The VM page tables can contain a mix 42 * vram pages and system memory pages and system memory pages 43 * can be mapped as snooped (cached system pages) or unsnooped 44 * (uncached system pages). 45 * Each VM has an ID associated with it and there is a page table 46 * associated with each VMID. When execting a command buffer, 47 * the kernel tells the the ring what VMID to use for that command 48 * buffer. VMIDs are allocated dynamically as commands are submitted. 49 * The userspace drivers maintain their own address space and the kernel 50 * sets up their pages tables accordingly when they submit their 51 * command buffers and a VMID is assigned. 52 * Cayman/Trinity support up to 8 active VMs at any given time; 53 * SI supports 16. 54 */ 55 56 #define START(node) ((node)->start) 57 #define LAST(node) ((node)->last) 58 59 INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last, 60 START, LAST, static, amdgpu_vm_it) 61 62 #undef START 63 #undef LAST 64 65 /* Local structure. Encapsulate some VM table update parameters to reduce 66 * the number of function parameters 67 */ 68 struct amdgpu_pte_update_params { 69 /* amdgpu device we do this update for */ 70 struct amdgpu_device *adev; 71 /* optional amdgpu_vm we do this update for */ 72 struct amdgpu_vm *vm; 73 /* address where to copy page table entries from */ 74 uint64_t src; 75 /* indirect buffer to fill with commands */ 76 struct amdgpu_ib *ib; 77 /* Function which actually does the update */ 78 void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe, 79 uint64_t addr, unsigned count, uint32_t incr, 80 uint64_t flags); 81 /* The next two are used during VM update by CPU 82 * DMA addresses to use for mapping 83 * Kernel pointer of PD/PT BO that needs to be updated 84 */ 85 dma_addr_t *pages_addr; 86 void *kptr; 87 }; 88 89 /* Helper to disable partial resident texture feature from a fence callback */ 90 struct amdgpu_prt_cb { 91 struct amdgpu_device *adev; 92 struct dma_fence_cb cb; 93 }; 94 95 /** 96 * amdgpu_vm_level_shift - return the addr shift for each level 97 * 98 * @adev: amdgpu_device pointer 99 * 100 * Returns the number of bits the pfn needs to be right shifted for a level. 101 */ 102 static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, 103 unsigned level) 104 { 105 unsigned shift = 0xff; 106 107 switch (level) { 108 case AMDGPU_VM_PDB2: 109 case AMDGPU_VM_PDB1: 110 case AMDGPU_VM_PDB0: 111 shift = 9 * (AMDGPU_VM_PDB0 - level) + 112 adev->vm_manager.block_size; 113 break; 114 case AMDGPU_VM_PTB: 115 shift = 0; 116 break; 117 default: 118 dev_err(adev->dev, "the level%d isn't supported.\n", level); 119 } 120 121 return shift; 122 } 123 124 /** 125 * amdgpu_vm_num_entries - return the number of entries in a PD/PT 126 * 127 * @adev: amdgpu_device pointer 128 * 129 * Calculate the number of entries in a page directory or page table. 130 */ 131 static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, 132 unsigned level) 133 { 134 unsigned shift = amdgpu_vm_level_shift(adev, 135 adev->vm_manager.root_level); 136 137 if (level == adev->vm_manager.root_level) 138 /* For the root directory */ 139 return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift; 140 else if (level != AMDGPU_VM_PTB) 141 /* Everything in between */ 142 return 512; 143 else 144 /* For the page tables on the leaves */ 145 return AMDGPU_VM_PTE_COUNT(adev); 146 } 147 148 /** 149 * amdgpu_vm_bo_size - returns the size of the BOs in bytes 150 * 151 * @adev: amdgpu_device pointer 152 * 153 * Calculate the size of the BO for a page directory or page table in bytes. 154 */ 155 static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level) 156 { 157 return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8); 158 } 159 160 /** 161 * amdgpu_vm_get_pd_bo - add the VM PD to a validation list 162 * 163 * @vm: vm providing the BOs 164 * @validated: head of validation list 165 * @entry: entry to add 166 * 167 * Add the page directory to the list of BOs to 168 * validate for command submission. 169 */ 170 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, 171 struct list_head *validated, 172 struct amdgpu_bo_list_entry *entry) 173 { 174 entry->robj = vm->root.base.bo; 175 entry->priority = 0; 176 entry->tv.bo = &entry->robj->tbo; 177 entry->tv.shared = true; 178 entry->user_pages = NULL; 179 list_add(&entry->tv.head, validated); 180 } 181 182 /** 183 * amdgpu_vm_validate_pt_bos - validate the page table BOs 184 * 185 * @adev: amdgpu device pointer 186 * @vm: vm providing the BOs 187 * @validate: callback to do the validation 188 * @param: parameter for the validation callback 189 * 190 * Validate the page table BOs on command submission if neccessary. 191 */ 192 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, 193 int (*validate)(void *p, struct amdgpu_bo *bo), 194 void *param) 195 { 196 struct ttm_bo_global *glob = adev->mman.bdev.glob; 197 int r; 198 199 spin_lock(&vm->status_lock); 200 while (!list_empty(&vm->evicted)) { 201 struct amdgpu_vm_bo_base *bo_base; 202 struct amdgpu_bo *bo; 203 204 bo_base = list_first_entry(&vm->evicted, 205 struct amdgpu_vm_bo_base, 206 vm_status); 207 spin_unlock(&vm->status_lock); 208 209 bo = bo_base->bo; 210 BUG_ON(!bo); 211 if (bo->parent) { 212 r = validate(param, bo); 213 if (r) 214 return r; 215 216 spin_lock(&glob->lru_lock); 217 ttm_bo_move_to_lru_tail(&bo->tbo); 218 if (bo->shadow) 219 ttm_bo_move_to_lru_tail(&bo->shadow->tbo); 220 spin_unlock(&glob->lru_lock); 221 } 222 223 if (bo->tbo.type == ttm_bo_type_kernel && 224 vm->use_cpu_for_update) { 225 r = amdgpu_bo_kmap(bo, NULL); 226 if (r) 227 return r; 228 } 229 230 spin_lock(&vm->status_lock); 231 if (bo->tbo.type != ttm_bo_type_kernel) 232 list_move(&bo_base->vm_status, &vm->moved); 233 else 234 list_move(&bo_base->vm_status, &vm->relocated); 235 } 236 spin_unlock(&vm->status_lock); 237 238 return 0; 239 } 240 241 /** 242 * amdgpu_vm_ready - check VM is ready for updates 243 * 244 * @vm: VM to check 245 * 246 * Check if all VM PDs/PTs are ready for updates 247 */ 248 bool amdgpu_vm_ready(struct amdgpu_vm *vm) 249 { 250 bool ready; 251 252 spin_lock(&vm->status_lock); 253 ready = list_empty(&vm->evicted); 254 spin_unlock(&vm->status_lock); 255 256 return ready; 257 } 258 259 /** 260 * amdgpu_vm_alloc_levels - allocate the PD/PT levels 261 * 262 * @adev: amdgpu_device pointer 263 * @vm: requested vm 264 * @saddr: start of the address range 265 * @eaddr: end of the address range 266 * 267 * Make sure the page directories and page tables are allocated 268 */ 269 static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, 270 struct amdgpu_vm *vm, 271 struct amdgpu_vm_pt *parent, 272 uint64_t saddr, uint64_t eaddr, 273 unsigned level) 274 { 275 unsigned shift = amdgpu_vm_level_shift(adev, level); 276 unsigned pt_idx, from, to; 277 int r; 278 u64 flags; 279 uint64_t init_value = 0; 280 281 if (!parent->entries) { 282 unsigned num_entries = amdgpu_vm_num_entries(adev, level); 283 284 parent->entries = kvmalloc_array(num_entries, 285 sizeof(struct amdgpu_vm_pt), 286 GFP_KERNEL | __GFP_ZERO); 287 if (!parent->entries) 288 return -ENOMEM; 289 memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt)); 290 } 291 292 from = saddr >> shift; 293 to = eaddr >> shift; 294 if (from >= amdgpu_vm_num_entries(adev, level) || 295 to >= amdgpu_vm_num_entries(adev, level)) 296 return -EINVAL; 297 298 ++level; 299 saddr = saddr & ((1 << shift) - 1); 300 eaddr = eaddr & ((1 << shift) - 1); 301 302 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | 303 AMDGPU_GEM_CREATE_VRAM_CLEARED; 304 if (vm->use_cpu_for_update) 305 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 306 else 307 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | 308 AMDGPU_GEM_CREATE_SHADOW); 309 310 if (vm->pte_support_ats) { 311 init_value = AMDGPU_PTE_DEFAULT_ATC; 312 if (level != AMDGPU_VM_PTB) 313 init_value |= AMDGPU_PDE_PTE; 314 315 } 316 317 /* walk over the address space and allocate the page tables */ 318 for (pt_idx = from; pt_idx <= to; ++pt_idx) { 319 struct reservation_object *resv = vm->root.base.bo->tbo.resv; 320 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; 321 struct amdgpu_bo *pt; 322 323 if (!entry->base.bo) { 324 r = amdgpu_bo_create(adev, 325 amdgpu_vm_bo_size(adev, level), 326 AMDGPU_GPU_PAGE_SIZE, true, 327 AMDGPU_GEM_DOMAIN_VRAM, 328 flags, 329 NULL, resv, init_value, &pt); 330 if (r) 331 return r; 332 333 if (vm->use_cpu_for_update) { 334 r = amdgpu_bo_kmap(pt, NULL); 335 if (r) { 336 amdgpu_bo_unref(&pt); 337 return r; 338 } 339 } 340 341 /* Keep a reference to the root directory to avoid 342 * freeing them up in the wrong order. 343 */ 344 pt->parent = amdgpu_bo_ref(parent->base.bo); 345 346 entry->base.vm = vm; 347 entry->base.bo = pt; 348 list_add_tail(&entry->base.bo_list, &pt->va); 349 spin_lock(&vm->status_lock); 350 list_add(&entry->base.vm_status, &vm->relocated); 351 spin_unlock(&vm->status_lock); 352 } 353 354 if (level < AMDGPU_VM_PTB) { 355 uint64_t sub_saddr = (pt_idx == from) ? saddr : 0; 356 uint64_t sub_eaddr = (pt_idx == to) ? eaddr : 357 ((1 << shift) - 1); 358 r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr, 359 sub_eaddr, level); 360 if (r) 361 return r; 362 } 363 } 364 365 return 0; 366 } 367 368 /** 369 * amdgpu_vm_alloc_pts - Allocate page tables. 370 * 371 * @adev: amdgpu_device pointer 372 * @vm: VM to allocate page tables for 373 * @saddr: Start address which needs to be allocated 374 * @size: Size from start address we need. 375 * 376 * Make sure the page tables are allocated. 377 */ 378 int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, 379 struct amdgpu_vm *vm, 380 uint64_t saddr, uint64_t size) 381 { 382 uint64_t last_pfn; 383 uint64_t eaddr; 384 385 /* validate the parameters */ 386 if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) 387 return -EINVAL; 388 389 eaddr = saddr + size - 1; 390 last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; 391 if (last_pfn >= adev->vm_manager.max_pfn) { 392 dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", 393 last_pfn, adev->vm_manager.max_pfn); 394 return -EINVAL; 395 } 396 397 saddr /= AMDGPU_GPU_PAGE_SIZE; 398 eaddr /= AMDGPU_GPU_PAGE_SIZE; 399 400 return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 401 adev->vm_manager.root_level); 402 } 403 404 /** 405 * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug 406 * 407 * @adev: amdgpu_device pointer 408 */ 409 void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev) 410 { 411 const struct amdgpu_ip_block *ip_block; 412 bool has_compute_vm_bug; 413 struct amdgpu_ring *ring; 414 int i; 415 416 has_compute_vm_bug = false; 417 418 ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); 419 if (ip_block) { 420 /* Compute has a VM bug for GFX version < 7. 421 Compute has a VM bug for GFX 8 MEC firmware version < 673.*/ 422 if (ip_block->version->major <= 7) 423 has_compute_vm_bug = true; 424 else if (ip_block->version->major == 8) 425 if (adev->gfx.mec_fw_version < 673) 426 has_compute_vm_bug = true; 427 } 428 429 for (i = 0; i < adev->num_rings; i++) { 430 ring = adev->rings[i]; 431 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) 432 /* only compute rings */ 433 ring->has_compute_vm_bug = has_compute_vm_bug; 434 else 435 ring->has_compute_vm_bug = false; 436 } 437 } 438 439 bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, 440 struct amdgpu_job *job) 441 { 442 struct amdgpu_device *adev = ring->adev; 443 unsigned vmhub = ring->funcs->vmhub; 444 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 445 struct amdgpu_vmid *id; 446 bool gds_switch_needed; 447 bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug; 448 449 if (job->vmid == 0) 450 return false; 451 id = &id_mgr->ids[job->vmid]; 452 gds_switch_needed = ring->funcs->emit_gds_switch && ( 453 id->gds_base != job->gds_base || 454 id->gds_size != job->gds_size || 455 id->gws_base != job->gws_base || 456 id->gws_size != job->gws_size || 457 id->oa_base != job->oa_base || 458 id->oa_size != job->oa_size); 459 460 if (amdgpu_vmid_had_gpu_reset(adev, id)) 461 return true; 462 463 return vm_flush_needed || gds_switch_needed; 464 } 465 466 static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) 467 { 468 return (adev->mc.real_vram_size == adev->mc.visible_vram_size); 469 } 470 471 /** 472 * amdgpu_vm_flush - hardware flush the vm 473 * 474 * @ring: ring to use for flush 475 * @vmid: vmid number to use 476 * @pd_addr: address of the page directory 477 * 478 * Emit a VM flush when it is necessary. 479 */ 480 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) 481 { 482 struct amdgpu_device *adev = ring->adev; 483 unsigned vmhub = ring->funcs->vmhub; 484 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 485 struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; 486 bool gds_switch_needed = ring->funcs->emit_gds_switch && ( 487 id->gds_base != job->gds_base || 488 id->gds_size != job->gds_size || 489 id->gws_base != job->gws_base || 490 id->gws_size != job->gws_size || 491 id->oa_base != job->oa_base || 492 id->oa_size != job->oa_size); 493 bool vm_flush_needed = job->vm_needs_flush; 494 unsigned patch_offset = 0; 495 int r; 496 497 if (amdgpu_vmid_had_gpu_reset(adev, id)) { 498 gds_switch_needed = true; 499 vm_flush_needed = true; 500 } 501 502 if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) 503 return 0; 504 505 if (ring->funcs->init_cond_exec) 506 patch_offset = amdgpu_ring_init_cond_exec(ring); 507 508 if (need_pipe_sync) 509 amdgpu_ring_emit_pipeline_sync(ring); 510 511 if (ring->funcs->emit_vm_flush && vm_flush_needed) { 512 struct dma_fence *fence; 513 514 trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); 515 amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); 516 517 r = amdgpu_fence_emit(ring, &fence); 518 if (r) 519 return r; 520 521 mutex_lock(&id_mgr->lock); 522 dma_fence_put(id->last_flush); 523 id->last_flush = fence; 524 id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); 525 mutex_unlock(&id_mgr->lock); 526 } 527 528 if (ring->funcs->emit_gds_switch && gds_switch_needed) { 529 id->gds_base = job->gds_base; 530 id->gds_size = job->gds_size; 531 id->gws_base = job->gws_base; 532 id->gws_size = job->gws_size; 533 id->oa_base = job->oa_base; 534 id->oa_size = job->oa_size; 535 amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, 536 job->gds_size, job->gws_base, 537 job->gws_size, job->oa_base, 538 job->oa_size); 539 } 540 541 if (ring->funcs->patch_cond_exec) 542 amdgpu_ring_patch_cond_exec(ring, patch_offset); 543 544 /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ 545 if (ring->funcs->emit_switch_buffer) { 546 amdgpu_ring_emit_switch_buffer(ring); 547 amdgpu_ring_emit_switch_buffer(ring); 548 } 549 return 0; 550 } 551 552 /** 553 * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo 554 * 555 * @vm: requested vm 556 * @bo: requested buffer object 557 * 558 * Find @bo inside the requested vm. 559 * Search inside the @bos vm list for the requested vm 560 * Returns the found bo_va or NULL if none is found 561 * 562 * Object has to be reserved! 563 */ 564 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, 565 struct amdgpu_bo *bo) 566 { 567 struct amdgpu_bo_va *bo_va; 568 569 list_for_each_entry(bo_va, &bo->va, base.bo_list) { 570 if (bo_va->base.vm == vm) { 571 return bo_va; 572 } 573 } 574 return NULL; 575 } 576 577 /** 578 * amdgpu_vm_do_set_ptes - helper to call the right asic function 579 * 580 * @params: see amdgpu_pte_update_params definition 581 * @pe: addr of the page entry 582 * @addr: dst addr to write into pe 583 * @count: number of page entries to update 584 * @incr: increase next addr by incr bytes 585 * @flags: hw access flags 586 * 587 * Traces the parameters and calls the right asic functions 588 * to setup the page table using the DMA. 589 */ 590 static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, 591 uint64_t pe, uint64_t addr, 592 unsigned count, uint32_t incr, 593 uint64_t flags) 594 { 595 trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); 596 597 if (count < 3) { 598 amdgpu_vm_write_pte(params->adev, params->ib, pe, 599 addr | flags, count, incr); 600 601 } else { 602 amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr, 603 count, incr, flags); 604 } 605 } 606 607 /** 608 * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART 609 * 610 * @params: see amdgpu_pte_update_params definition 611 * @pe: addr of the page entry 612 * @addr: dst addr to write into pe 613 * @count: number of page entries to update 614 * @incr: increase next addr by incr bytes 615 * @flags: hw access flags 616 * 617 * Traces the parameters and calls the DMA function to copy the PTEs. 618 */ 619 static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, 620 uint64_t pe, uint64_t addr, 621 unsigned count, uint32_t incr, 622 uint64_t flags) 623 { 624 uint64_t src = (params->src + (addr >> 12) * 8); 625 626 627 trace_amdgpu_vm_copy_ptes(pe, src, count); 628 629 amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count); 630 } 631 632 /** 633 * amdgpu_vm_map_gart - Resolve gart mapping of addr 634 * 635 * @pages_addr: optional DMA address to use for lookup 636 * @addr: the unmapped addr 637 * 638 * Look up the physical address of the page that the pte resolves 639 * to and return the pointer for the page table entry. 640 */ 641 static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) 642 { 643 uint64_t result; 644 645 /* page table offset */ 646 result = pages_addr[addr >> PAGE_SHIFT]; 647 648 /* in case cpu page size != gpu page size*/ 649 result |= addr & (~PAGE_MASK); 650 651 result &= 0xFFFFFFFFFFFFF000ULL; 652 653 return result; 654 } 655 656 /** 657 * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU 658 * 659 * @params: see amdgpu_pte_update_params definition 660 * @pe: kmap addr of the page entry 661 * @addr: dst addr to write into pe 662 * @count: number of page entries to update 663 * @incr: increase next addr by incr bytes 664 * @flags: hw access flags 665 * 666 * Write count number of PT/PD entries directly. 667 */ 668 static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, 669 uint64_t pe, uint64_t addr, 670 unsigned count, uint32_t incr, 671 uint64_t flags) 672 { 673 unsigned int i; 674 uint64_t value; 675 676 trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); 677 678 for (i = 0; i < count; i++) { 679 value = params->pages_addr ? 680 amdgpu_vm_map_gart(params->pages_addr, addr) : 681 addr; 682 amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe, 683 i, value, flags); 684 addr += incr; 685 } 686 } 687 688 static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, 689 void *owner) 690 { 691 struct amdgpu_sync sync; 692 int r; 693 694 amdgpu_sync_create(&sync); 695 amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false); 696 r = amdgpu_sync_wait(&sync, true); 697 amdgpu_sync_free(&sync); 698 699 return r; 700 } 701 702 /* 703 * amdgpu_vm_update_pde - update a single level in the hierarchy 704 * 705 * @param: parameters for the update 706 * @vm: requested vm 707 * @parent: parent directory 708 * @entry: entry to update 709 * 710 * Makes sure the requested entry in parent is up to date. 711 */ 712 static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, 713 struct amdgpu_vm *vm, 714 struct amdgpu_vm_pt *parent, 715 struct amdgpu_vm_pt *entry) 716 { 717 struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL, *pbo; 718 uint64_t pd_addr, shadow_addr = 0; 719 uint64_t pde, pt, flags; 720 unsigned level; 721 722 /* Don't update huge pages here */ 723 if (entry->huge) 724 return; 725 726 if (vm->use_cpu_for_update) { 727 pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); 728 } else { 729 pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); 730 shadow = parent->base.bo->shadow; 731 if (shadow) 732 shadow_addr = amdgpu_bo_gpu_offset(shadow); 733 } 734 735 for (level = 0, pbo = parent->base.bo->parent; pbo; ++level) 736 pbo = pbo->parent; 737 738 level += params->adev->vm_manager.root_level; 739 pt = amdgpu_bo_gpu_offset(bo); 740 flags = AMDGPU_PTE_VALID; 741 amdgpu_gart_get_vm_pde(params->adev, level, &pt, &flags); 742 if (shadow) { 743 pde = shadow_addr + (entry - parent->entries) * 8; 744 params->func(params, pde, pt, 1, 0, flags); 745 } 746 747 pde = pd_addr + (entry - parent->entries) * 8; 748 params->func(params, pde, pt, 1, 0, flags); 749 } 750 751 /* 752 * amdgpu_vm_invalidate_level - mark all PD levels as invalid 753 * 754 * @parent: parent PD 755 * 756 * Mark all PD level as invalid after an error. 757 */ 758 static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev, 759 struct amdgpu_vm *vm, 760 struct amdgpu_vm_pt *parent, 761 unsigned level) 762 { 763 unsigned pt_idx, num_entries; 764 765 /* 766 * Recurse into the subdirectories. This recursion is harmless because 767 * we only have a maximum of 5 layers. 768 */ 769 num_entries = amdgpu_vm_num_entries(adev, level); 770 for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) { 771 struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; 772 773 if (!entry->base.bo) 774 continue; 775 776 spin_lock(&vm->status_lock); 777 if (list_empty(&entry->base.vm_status)) 778 list_add(&entry->base.vm_status, &vm->relocated); 779 spin_unlock(&vm->status_lock); 780 amdgpu_vm_invalidate_level(adev, vm, entry, level + 1); 781 } 782 } 783 784 /* 785 * amdgpu_vm_update_directories - make sure that all directories are valid 786 * 787 * @adev: amdgpu_device pointer 788 * @vm: requested vm 789 * 790 * Makes sure all directories are up to date. 791 * Returns 0 for success, error for failure. 792 */ 793 int amdgpu_vm_update_directories(struct amdgpu_device *adev, 794 struct amdgpu_vm *vm) 795 { 796 struct amdgpu_pte_update_params params; 797 struct amdgpu_job *job; 798 unsigned ndw = 0; 799 int r = 0; 800 801 if (list_empty(&vm->relocated)) 802 return 0; 803 804 restart: 805 memset(¶ms, 0, sizeof(params)); 806 params.adev = adev; 807 808 if (vm->use_cpu_for_update) { 809 r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); 810 if (unlikely(r)) 811 return r; 812 813 params.func = amdgpu_vm_cpu_set_ptes; 814 } else { 815 ndw = 512 * 8; 816 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); 817 if (r) 818 return r; 819 820 params.ib = &job->ibs[0]; 821 params.func = amdgpu_vm_do_set_ptes; 822 } 823 824 spin_lock(&vm->status_lock); 825 while (!list_empty(&vm->relocated)) { 826 struct amdgpu_vm_bo_base *bo_base, *parent; 827 struct amdgpu_vm_pt *pt, *entry; 828 struct amdgpu_bo *bo; 829 830 bo_base = list_first_entry(&vm->relocated, 831 struct amdgpu_vm_bo_base, 832 vm_status); 833 list_del_init(&bo_base->vm_status); 834 spin_unlock(&vm->status_lock); 835 836 bo = bo_base->bo->parent; 837 if (!bo) { 838 spin_lock(&vm->status_lock); 839 continue; 840 } 841 842 parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base, 843 bo_list); 844 pt = container_of(parent, struct amdgpu_vm_pt, base); 845 entry = container_of(bo_base, struct amdgpu_vm_pt, base); 846 847 amdgpu_vm_update_pde(¶ms, vm, pt, entry); 848 849 spin_lock(&vm->status_lock); 850 if (!vm->use_cpu_for_update && 851 (ndw - params.ib->length_dw) < 32) 852 break; 853 } 854 spin_unlock(&vm->status_lock); 855 856 if (vm->use_cpu_for_update) { 857 /* Flush HDP */ 858 mb(); 859 amdgpu_gart_flush_gpu_tlb(adev, 0); 860 } else if (params.ib->length_dw == 0) { 861 amdgpu_job_free(job); 862 } else { 863 struct amdgpu_bo *root = vm->root.base.bo; 864 struct amdgpu_ring *ring; 865 struct dma_fence *fence; 866 867 ring = container_of(vm->entity.sched, struct amdgpu_ring, 868 sched); 869 870 amdgpu_ring_pad_ib(ring, params.ib); 871 amdgpu_sync_resv(adev, &job->sync, root->tbo.resv, 872 AMDGPU_FENCE_OWNER_VM, false); 873 if (root->shadow) 874 amdgpu_sync_resv(adev, &job->sync, 875 root->shadow->tbo.resv, 876 AMDGPU_FENCE_OWNER_VM, false); 877 878 WARN_ON(params.ib->length_dw > ndw); 879 r = amdgpu_job_submit(job, ring, &vm->entity, 880 AMDGPU_FENCE_OWNER_VM, &fence); 881 if (r) 882 goto error; 883 884 amdgpu_bo_fence(root, fence, true); 885 dma_fence_put(vm->last_update); 886 vm->last_update = fence; 887 } 888 889 if (!list_empty(&vm->relocated)) 890 goto restart; 891 892 return 0; 893 894 error: 895 amdgpu_vm_invalidate_level(adev, vm, &vm->root, 896 adev->vm_manager.root_level); 897 amdgpu_job_free(job); 898 return r; 899 } 900 901 /** 902 * amdgpu_vm_find_entry - find the entry for an address 903 * 904 * @p: see amdgpu_pte_update_params definition 905 * @addr: virtual address in question 906 * @entry: resulting entry or NULL 907 * @parent: parent entry 908 * 909 * Find the vm_pt entry and it's parent for the given address. 910 */ 911 void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, 912 struct amdgpu_vm_pt **entry, 913 struct amdgpu_vm_pt **parent) 914 { 915 unsigned level = p->adev->vm_manager.root_level; 916 917 *parent = NULL; 918 *entry = &p->vm->root; 919 while ((*entry)->entries) { 920 unsigned shift = amdgpu_vm_level_shift(p->adev, level++); 921 922 *parent = *entry; 923 *entry = &(*entry)->entries[addr >> shift]; 924 addr &= (1ULL << shift) - 1; 925 } 926 927 if (level != AMDGPU_VM_PTB) 928 *entry = NULL; 929 } 930 931 /** 932 * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages 933 * 934 * @p: see amdgpu_pte_update_params definition 935 * @entry: vm_pt entry to check 936 * @parent: parent entry 937 * @nptes: number of PTEs updated with this operation 938 * @dst: destination address where the PTEs should point to 939 * @flags: access flags fro the PTEs 940 * 941 * Check if we can update the PD with a huge page. 942 */ 943 static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, 944 struct amdgpu_vm_pt *entry, 945 struct amdgpu_vm_pt *parent, 946 unsigned nptes, uint64_t dst, 947 uint64_t flags) 948 { 949 uint64_t pd_addr, pde; 950 951 /* In the case of a mixed PT the PDE must point to it*/ 952 if (p->adev->asic_type >= CHIP_VEGA10 && !p->src && 953 nptes == AMDGPU_VM_PTE_COUNT(p->adev)) { 954 /* Set the huge page flag to stop scanning at this PDE */ 955 flags |= AMDGPU_PDE_PTE; 956 } 957 958 if (!(flags & AMDGPU_PDE_PTE)) { 959 if (entry->huge) { 960 /* Add the entry to the relocated list to update it. */ 961 entry->huge = false; 962 spin_lock(&p->vm->status_lock); 963 list_move(&entry->base.vm_status, &p->vm->relocated); 964 spin_unlock(&p->vm->status_lock); 965 } 966 return; 967 } 968 969 entry->huge = true; 970 amdgpu_gart_get_vm_pde(p->adev, AMDGPU_VM_PDB0, 971 &dst, &flags); 972 973 if (p->func == amdgpu_vm_cpu_set_ptes) { 974 pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); 975 } else { 976 if (parent->base.bo->shadow) { 977 pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow); 978 pde = pd_addr + (entry - parent->entries) * 8; 979 p->func(p, pde, dst, 1, 0, flags); 980 } 981 pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); 982 } 983 pde = pd_addr + (entry - parent->entries) * 8; 984 p->func(p, pde, dst, 1, 0, flags); 985 } 986 987 /** 988 * amdgpu_vm_update_ptes - make sure that page tables are valid 989 * 990 * @params: see amdgpu_pte_update_params definition 991 * @vm: requested vm 992 * @start: start of GPU address range 993 * @end: end of GPU address range 994 * @dst: destination address to map to, the next dst inside the function 995 * @flags: mapping flags 996 * 997 * Update the page tables in the range @start - @end. 998 * Returns 0 for success, -EINVAL for failure. 999 */ 1000 static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, 1001 uint64_t start, uint64_t end, 1002 uint64_t dst, uint64_t flags) 1003 { 1004 struct amdgpu_device *adev = params->adev; 1005 const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1; 1006 1007 uint64_t addr, pe_start; 1008 struct amdgpu_bo *pt; 1009 unsigned nptes; 1010 bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); 1011 1012 /* walk over the address space and update the page tables */ 1013 for (addr = start; addr < end; addr += nptes, 1014 dst += nptes * AMDGPU_GPU_PAGE_SIZE) { 1015 struct amdgpu_vm_pt *entry, *parent; 1016 1017 amdgpu_vm_get_entry(params, addr, &entry, &parent); 1018 if (!entry) 1019 return -ENOENT; 1020 1021 if ((addr & ~mask) == (end & ~mask)) 1022 nptes = end - addr; 1023 else 1024 nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); 1025 1026 amdgpu_vm_handle_huge_pages(params, entry, parent, 1027 nptes, dst, flags); 1028 /* We don't need to update PTEs for huge pages */ 1029 if (entry->huge) 1030 continue; 1031 1032 pt = entry->base.bo; 1033 if (use_cpu_update) { 1034 pe_start = (unsigned long)amdgpu_bo_kptr(pt); 1035 } else { 1036 if (pt->shadow) { 1037 pe_start = amdgpu_bo_gpu_offset(pt->shadow); 1038 pe_start += (addr & mask) * 8; 1039 params->func(params, pe_start, dst, nptes, 1040 AMDGPU_GPU_PAGE_SIZE, flags); 1041 } 1042 pe_start = amdgpu_bo_gpu_offset(pt); 1043 } 1044 1045 pe_start += (addr & mask) * 8; 1046 params->func(params, pe_start, dst, nptes, 1047 AMDGPU_GPU_PAGE_SIZE, flags); 1048 } 1049 1050 return 0; 1051 } 1052 1053 /* 1054 * amdgpu_vm_frag_ptes - add fragment information to PTEs 1055 * 1056 * @params: see amdgpu_pte_update_params definition 1057 * @vm: requested vm 1058 * @start: first PTE to handle 1059 * @end: last PTE to handle 1060 * @dst: addr those PTEs should point to 1061 * @flags: hw mapping flags 1062 * Returns 0 for success, -EINVAL for failure. 1063 */ 1064 static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, 1065 uint64_t start, uint64_t end, 1066 uint64_t dst, uint64_t flags) 1067 { 1068 /** 1069 * The MC L1 TLB supports variable sized pages, based on a fragment 1070 * field in the PTE. When this field is set to a non-zero value, page 1071 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE 1072 * flags are considered valid for all PTEs within the fragment range 1073 * and corresponding mappings are assumed to be physically contiguous. 1074 * 1075 * The L1 TLB can store a single PTE for the whole fragment, 1076 * significantly increasing the space available for translation 1077 * caching. This leads to large improvements in throughput when the 1078 * TLB is under pressure. 1079 * 1080 * The L2 TLB distributes small and large fragments into two 1081 * asymmetric partitions. The large fragment cache is significantly 1082 * larger. Thus, we try to use large fragments wherever possible. 1083 * Userspace can support this by aligning virtual base address and 1084 * allocation size to the fragment size. 1085 */ 1086 unsigned max_frag = params->adev->vm_manager.fragment_size; 1087 int r; 1088 1089 /* system pages are non continuously */ 1090 if (params->src || !(flags & AMDGPU_PTE_VALID)) 1091 return amdgpu_vm_update_ptes(params, start, end, dst, flags); 1092 1093 while (start != end) { 1094 uint64_t frag_flags, frag_end; 1095 unsigned frag; 1096 1097 /* This intentionally wraps around if no bit is set */ 1098 frag = min((unsigned)ffs(start) - 1, 1099 (unsigned)fls64(end - start) - 1); 1100 if (frag >= max_frag) { 1101 frag_flags = AMDGPU_PTE_FRAG(max_frag); 1102 frag_end = end & ~((1ULL << max_frag) - 1); 1103 } else { 1104 frag_flags = AMDGPU_PTE_FRAG(frag); 1105 frag_end = start + (1 << frag); 1106 } 1107 1108 r = amdgpu_vm_update_ptes(params, start, frag_end, dst, 1109 flags | frag_flags); 1110 if (r) 1111 return r; 1112 1113 dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE; 1114 start = frag_end; 1115 } 1116 1117 return 0; 1118 } 1119 1120 /** 1121 * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table 1122 * 1123 * @adev: amdgpu_device pointer 1124 * @exclusive: fence we need to sync to 1125 * @pages_addr: DMA addresses to use for mapping 1126 * @vm: requested vm 1127 * @start: start of mapped range 1128 * @last: last mapped entry 1129 * @flags: flags for the entries 1130 * @addr: addr to set the area to 1131 * @fence: optional resulting fence 1132 * 1133 * Fill in the page table entries between @start and @last. 1134 * Returns 0 for success, -EINVAL for failure. 1135 */ 1136 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, 1137 struct dma_fence *exclusive, 1138 dma_addr_t *pages_addr, 1139 struct amdgpu_vm *vm, 1140 uint64_t start, uint64_t last, 1141 uint64_t flags, uint64_t addr, 1142 struct dma_fence **fence) 1143 { 1144 struct amdgpu_ring *ring; 1145 void *owner = AMDGPU_FENCE_OWNER_VM; 1146 unsigned nptes, ncmds, ndw; 1147 struct amdgpu_job *job; 1148 struct amdgpu_pte_update_params params; 1149 struct dma_fence *f = NULL; 1150 int r; 1151 1152 memset(¶ms, 0, sizeof(params)); 1153 params.adev = adev; 1154 params.vm = vm; 1155 1156 /* sync to everything on unmapping */ 1157 if (!(flags & AMDGPU_PTE_VALID)) 1158 owner = AMDGPU_FENCE_OWNER_UNDEFINED; 1159 1160 if (vm->use_cpu_for_update) { 1161 /* params.src is used as flag to indicate system Memory */ 1162 if (pages_addr) 1163 params.src = ~0; 1164 1165 /* Wait for PT BOs to be free. PTs share the same resv. object 1166 * as the root PD BO 1167 */ 1168 r = amdgpu_vm_wait_pd(adev, vm, owner); 1169 if (unlikely(r)) 1170 return r; 1171 1172 params.func = amdgpu_vm_cpu_set_ptes; 1173 params.pages_addr = pages_addr; 1174 return amdgpu_vm_frag_ptes(¶ms, start, last + 1, 1175 addr, flags); 1176 } 1177 1178 ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); 1179 1180 nptes = last - start + 1; 1181 1182 /* 1183 * reserve space for two commands every (1 << BLOCK_SIZE) 1184 * entries or 2k dwords (whatever is smaller) 1185 * 1186 * The second command is for the shadow pagetables. 1187 */ 1188 if (vm->root.base.bo->shadow) 1189 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2; 1190 else 1191 ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1); 1192 1193 /* padding, etc. */ 1194 ndw = 64; 1195 1196 if (pages_addr) { 1197 /* copy commands needed */ 1198 ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw; 1199 1200 /* and also PTEs */ 1201 ndw += nptes * 2; 1202 1203 params.func = amdgpu_vm_do_copy_ptes; 1204 1205 } else { 1206 /* set page commands needed */ 1207 ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw; 1208 1209 /* extra commands for begin/end fragments */ 1210 ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw 1211 * adev->vm_manager.fragment_size; 1212 1213 params.func = amdgpu_vm_do_set_ptes; 1214 } 1215 1216 r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); 1217 if (r) 1218 return r; 1219 1220 params.ib = &job->ibs[0]; 1221 1222 if (pages_addr) { 1223 uint64_t *pte; 1224 unsigned i; 1225 1226 /* Put the PTEs at the end of the IB. */ 1227 i = ndw - nptes * 2; 1228 pte= (uint64_t *)&(job->ibs->ptr[i]); 1229 params.src = job->ibs->gpu_addr + i * 4; 1230 1231 for (i = 0; i < nptes; ++i) { 1232 pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i * 1233 AMDGPU_GPU_PAGE_SIZE); 1234 pte[i] |= flags; 1235 } 1236 addr = 0; 1237 } 1238 1239 r = amdgpu_sync_fence(adev, &job->sync, exclusive, false); 1240 if (r) 1241 goto error_free; 1242 1243 r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, 1244 owner, false); 1245 if (r) 1246 goto error_free; 1247 1248 r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); 1249 if (r) 1250 goto error_free; 1251 1252 r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); 1253 if (r) 1254 goto error_free; 1255 1256 amdgpu_ring_pad_ib(ring, params.ib); 1257 WARN_ON(params.ib->length_dw > ndw); 1258 r = amdgpu_job_submit(job, ring, &vm->entity, 1259 AMDGPU_FENCE_OWNER_VM, &f); 1260 if (r) 1261 goto error_free; 1262 1263 amdgpu_bo_fence(vm->root.base.bo, f, true); 1264 dma_fence_put(*fence); 1265 *fence = f; 1266 return 0; 1267 1268 error_free: 1269 amdgpu_job_free(job); 1270 return r; 1271 } 1272 1273 /** 1274 * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks 1275 * 1276 * @adev: amdgpu_device pointer 1277 * @exclusive: fence we need to sync to 1278 * @pages_addr: DMA addresses to use for mapping 1279 * @vm: requested vm 1280 * @mapping: mapped range and flags to use for the update 1281 * @flags: HW flags for the mapping 1282 * @nodes: array of drm_mm_nodes with the MC addresses 1283 * @fence: optional resulting fence 1284 * 1285 * Split the mapping into smaller chunks so that each update fits 1286 * into a SDMA IB. 1287 * Returns 0 for success, -EINVAL for failure. 1288 */ 1289 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, 1290 struct dma_fence *exclusive, 1291 dma_addr_t *pages_addr, 1292 struct amdgpu_vm *vm, 1293 struct amdgpu_bo_va_mapping *mapping, 1294 uint64_t flags, 1295 struct drm_mm_node *nodes, 1296 struct dma_fence **fence) 1297 { 1298 unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size; 1299 uint64_t pfn, start = mapping->start; 1300 int r; 1301 1302 /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here 1303 * but in case of something, we filter the flags in first place 1304 */ 1305 if (!(mapping->flags & AMDGPU_PTE_READABLE)) 1306 flags &= ~AMDGPU_PTE_READABLE; 1307 if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) 1308 flags &= ~AMDGPU_PTE_WRITEABLE; 1309 1310 flags &= ~AMDGPU_PTE_EXECUTABLE; 1311 flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; 1312 1313 flags &= ~AMDGPU_PTE_MTYPE_MASK; 1314 flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK); 1315 1316 if ((mapping->flags & AMDGPU_PTE_PRT) && 1317 (adev->asic_type >= CHIP_VEGA10)) { 1318 flags |= AMDGPU_PTE_PRT; 1319 flags &= ~AMDGPU_PTE_VALID; 1320 } 1321 1322 trace_amdgpu_vm_bo_update(mapping); 1323 1324 pfn = mapping->offset >> PAGE_SHIFT; 1325 if (nodes) { 1326 while (pfn >= nodes->size) { 1327 pfn -= nodes->size; 1328 ++nodes; 1329 } 1330 } 1331 1332 do { 1333 dma_addr_t *dma_addr = NULL; 1334 uint64_t max_entries; 1335 uint64_t addr, last; 1336 1337 if (nodes) { 1338 addr = nodes->start << PAGE_SHIFT; 1339 max_entries = (nodes->size - pfn) * 1340 (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); 1341 } else { 1342 addr = 0; 1343 max_entries = S64_MAX; 1344 } 1345 1346 if (pages_addr) { 1347 uint64_t count; 1348 1349 max_entries = min(max_entries, 16ull * 1024ull); 1350 for (count = 1; count < max_entries; ++count) { 1351 uint64_t idx = pfn + count; 1352 1353 if (pages_addr[idx] != 1354 (pages_addr[idx - 1] + PAGE_SIZE)) 1355 break; 1356 } 1357 1358 if (count < min_linear_pages) { 1359 addr = pfn << PAGE_SHIFT; 1360 dma_addr = pages_addr; 1361 } else { 1362 addr = pages_addr[pfn]; 1363 max_entries = count; 1364 } 1365 1366 } else if (flags & AMDGPU_PTE_VALID) { 1367 addr += adev->vm_manager.vram_base_offset; 1368 addr += pfn << PAGE_SHIFT; 1369 } 1370 1371 last = min((uint64_t)mapping->last, start + max_entries - 1); 1372 r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm, 1373 start, last, flags, addr, 1374 fence); 1375 if (r) 1376 return r; 1377 1378 pfn += last - start + 1; 1379 if (nodes && nodes->size == pfn) { 1380 pfn = 0; 1381 ++nodes; 1382 } 1383 start = last + 1; 1384 1385 } while (unlikely(start != mapping->last + 1)); 1386 1387 return 0; 1388 } 1389 1390 /** 1391 * amdgpu_vm_bo_update - update all BO mappings in the vm page table 1392 * 1393 * @adev: amdgpu_device pointer 1394 * @bo_va: requested BO and VM object 1395 * @clear: if true clear the entries 1396 * 1397 * Fill in the page table entries for @bo_va. 1398 * Returns 0 for success, -EINVAL for failure. 1399 */ 1400 int amdgpu_vm_bo_update(struct amdgpu_device *adev, 1401 struct amdgpu_bo_va *bo_va, 1402 bool clear) 1403 { 1404 struct amdgpu_bo *bo = bo_va->base.bo; 1405 struct amdgpu_vm *vm = bo_va->base.vm; 1406 struct amdgpu_bo_va_mapping *mapping; 1407 dma_addr_t *pages_addr = NULL; 1408 struct ttm_mem_reg *mem; 1409 struct drm_mm_node *nodes; 1410 struct dma_fence *exclusive, **last_update; 1411 uint64_t flags; 1412 int r; 1413 1414 if (clear || !bo_va->base.bo) { 1415 mem = NULL; 1416 nodes = NULL; 1417 exclusive = NULL; 1418 } else { 1419 struct ttm_dma_tt *ttm; 1420 1421 mem = &bo_va->base.bo->tbo.mem; 1422 nodes = mem->mm_node; 1423 if (mem->mem_type == TTM_PL_TT) { 1424 ttm = container_of(bo_va->base.bo->tbo.ttm, 1425 struct ttm_dma_tt, ttm); 1426 pages_addr = ttm->dma_address; 1427 } 1428 exclusive = reservation_object_get_excl(bo->tbo.resv); 1429 } 1430 1431 if (bo) 1432 flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); 1433 else 1434 flags = 0x0; 1435 1436 if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv)) 1437 last_update = &vm->last_update; 1438 else 1439 last_update = &bo_va->last_pt_update; 1440 1441 if (!clear && bo_va->base.moved) { 1442 bo_va->base.moved = false; 1443 list_splice_init(&bo_va->valids, &bo_va->invalids); 1444 1445 } else if (bo_va->cleared != clear) { 1446 list_splice_init(&bo_va->valids, &bo_va->invalids); 1447 } 1448 1449 list_for_each_entry(mapping, &bo_va->invalids, list) { 1450 r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm, 1451 mapping, flags, nodes, 1452 last_update); 1453 if (r) 1454 return r; 1455 } 1456 1457 if (vm->use_cpu_for_update) { 1458 /* Flush HDP */ 1459 mb(); 1460 amdgpu_gart_flush_gpu_tlb(adev, 0); 1461 } 1462 1463 spin_lock(&vm->status_lock); 1464 list_del_init(&bo_va->base.vm_status); 1465 spin_unlock(&vm->status_lock); 1466 1467 list_splice_init(&bo_va->invalids, &bo_va->valids); 1468 bo_va->cleared = clear; 1469 1470 if (trace_amdgpu_vm_bo_mapping_enabled()) { 1471 list_for_each_entry(mapping, &bo_va->valids, list) 1472 trace_amdgpu_vm_bo_mapping(mapping); 1473 } 1474 1475 return 0; 1476 } 1477 1478 /** 1479 * amdgpu_vm_update_prt_state - update the global PRT state 1480 */ 1481 static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) 1482 { 1483 unsigned long flags; 1484 bool enable; 1485 1486 spin_lock_irqsave(&adev->vm_manager.prt_lock, flags); 1487 enable = !!atomic_read(&adev->vm_manager.num_prt_users); 1488 adev->gart.gart_funcs->set_prt(adev, enable); 1489 spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags); 1490 } 1491 1492 /** 1493 * amdgpu_vm_prt_get - add a PRT user 1494 */ 1495 static void amdgpu_vm_prt_get(struct amdgpu_device *adev) 1496 { 1497 if (!adev->gart.gart_funcs->set_prt) 1498 return; 1499 1500 if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1) 1501 amdgpu_vm_update_prt_state(adev); 1502 } 1503 1504 /** 1505 * amdgpu_vm_prt_put - drop a PRT user 1506 */ 1507 static void amdgpu_vm_prt_put(struct amdgpu_device *adev) 1508 { 1509 if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0) 1510 amdgpu_vm_update_prt_state(adev); 1511 } 1512 1513 /** 1514 * amdgpu_vm_prt_cb - callback for updating the PRT status 1515 */ 1516 static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb) 1517 { 1518 struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb); 1519 1520 amdgpu_vm_prt_put(cb->adev); 1521 kfree(cb); 1522 } 1523 1524 /** 1525 * amdgpu_vm_add_prt_cb - add callback for updating the PRT status 1526 */ 1527 static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev, 1528 struct dma_fence *fence) 1529 { 1530 struct amdgpu_prt_cb *cb; 1531 1532 if (!adev->gart.gart_funcs->set_prt) 1533 return; 1534 1535 cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL); 1536 if (!cb) { 1537 /* Last resort when we are OOM */ 1538 if (fence) 1539 dma_fence_wait(fence, false); 1540 1541 amdgpu_vm_prt_put(adev); 1542 } else { 1543 cb->adev = adev; 1544 if (!fence || dma_fence_add_callback(fence, &cb->cb, 1545 amdgpu_vm_prt_cb)) 1546 amdgpu_vm_prt_cb(fence, &cb->cb); 1547 } 1548 } 1549 1550 /** 1551 * amdgpu_vm_free_mapping - free a mapping 1552 * 1553 * @adev: amdgpu_device pointer 1554 * @vm: requested vm 1555 * @mapping: mapping to be freed 1556 * @fence: fence of the unmap operation 1557 * 1558 * Free a mapping and make sure we decrease the PRT usage count if applicable. 1559 */ 1560 static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, 1561 struct amdgpu_vm *vm, 1562 struct amdgpu_bo_va_mapping *mapping, 1563 struct dma_fence *fence) 1564 { 1565 if (mapping->flags & AMDGPU_PTE_PRT) 1566 amdgpu_vm_add_prt_cb(adev, fence); 1567 kfree(mapping); 1568 } 1569 1570 /** 1571 * amdgpu_vm_prt_fini - finish all prt mappings 1572 * 1573 * @adev: amdgpu_device pointer 1574 * @vm: requested vm 1575 * 1576 * Register a cleanup callback to disable PRT support after VM dies. 1577 */ 1578 static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) 1579 { 1580 struct reservation_object *resv = vm->root.base.bo->tbo.resv; 1581 struct dma_fence *excl, **shared; 1582 unsigned i, shared_count; 1583 int r; 1584 1585 r = reservation_object_get_fences_rcu(resv, &excl, 1586 &shared_count, &shared); 1587 if (r) { 1588 /* Not enough memory to grab the fence list, as last resort 1589 * block for all the fences to complete. 1590 */ 1591 reservation_object_wait_timeout_rcu(resv, true, false, 1592 MAX_SCHEDULE_TIMEOUT); 1593 return; 1594 } 1595 1596 /* Add a callback for each fence in the reservation object */ 1597 amdgpu_vm_prt_get(adev); 1598 amdgpu_vm_add_prt_cb(adev, excl); 1599 1600 for (i = 0; i < shared_count; ++i) { 1601 amdgpu_vm_prt_get(adev); 1602 amdgpu_vm_add_prt_cb(adev, shared[i]); 1603 } 1604 1605 kfree(shared); 1606 } 1607 1608 /** 1609 * amdgpu_vm_clear_freed - clear freed BOs in the PT 1610 * 1611 * @adev: amdgpu_device pointer 1612 * @vm: requested vm 1613 * @fence: optional resulting fence (unchanged if no work needed to be done 1614 * or if an error occurred) 1615 * 1616 * Make sure all freed BOs are cleared in the PT. 1617 * Returns 0 for success. 1618 * 1619 * PTs have to be reserved and mutex must be locked! 1620 */ 1621 int amdgpu_vm_clear_freed(struct amdgpu_device *adev, 1622 struct amdgpu_vm *vm, 1623 struct dma_fence **fence) 1624 { 1625 struct amdgpu_bo_va_mapping *mapping; 1626 struct dma_fence *f = NULL; 1627 int r; 1628 uint64_t init_pte_value = 0; 1629 1630 while (!list_empty(&vm->freed)) { 1631 mapping = list_first_entry(&vm->freed, 1632 struct amdgpu_bo_va_mapping, list); 1633 list_del(&mapping->list); 1634 1635 if (vm->pte_support_ats) 1636 init_pte_value = AMDGPU_PTE_DEFAULT_ATC; 1637 1638 r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, 1639 mapping->start, mapping->last, 1640 init_pte_value, 0, &f); 1641 amdgpu_vm_free_mapping(adev, vm, mapping, f); 1642 if (r) { 1643 dma_fence_put(f); 1644 return r; 1645 } 1646 } 1647 1648 if (fence && f) { 1649 dma_fence_put(*fence); 1650 *fence = f; 1651 } else { 1652 dma_fence_put(f); 1653 } 1654 1655 return 0; 1656 1657 } 1658 1659 /** 1660 * amdgpu_vm_handle_moved - handle moved BOs in the PT 1661 * 1662 * @adev: amdgpu_device pointer 1663 * @vm: requested vm 1664 * @sync: sync object to add fences to 1665 * 1666 * Make sure all BOs which are moved are updated in the PTs. 1667 * Returns 0 for success. 1668 * 1669 * PTs have to be reserved! 1670 */ 1671 int amdgpu_vm_handle_moved(struct amdgpu_device *adev, 1672 struct amdgpu_vm *vm) 1673 { 1674 bool clear; 1675 int r = 0; 1676 1677 spin_lock(&vm->status_lock); 1678 while (!list_empty(&vm->moved)) { 1679 struct amdgpu_bo_va *bo_va; 1680 struct reservation_object *resv; 1681 1682 bo_va = list_first_entry(&vm->moved, 1683 struct amdgpu_bo_va, base.vm_status); 1684 spin_unlock(&vm->status_lock); 1685 1686 resv = bo_va->base.bo->tbo.resv; 1687 1688 /* Per VM BOs never need to bo cleared in the page tables */ 1689 if (resv == vm->root.base.bo->tbo.resv) 1690 clear = false; 1691 /* Try to reserve the BO to avoid clearing its ptes */ 1692 else if (!amdgpu_vm_debug && reservation_object_trylock(resv)) 1693 clear = false; 1694 /* Somebody else is using the BO right now */ 1695 else 1696 clear = true; 1697 1698 r = amdgpu_vm_bo_update(adev, bo_va, clear); 1699 if (r) 1700 return r; 1701 1702 if (!clear && resv != vm->root.base.bo->tbo.resv) 1703 reservation_object_unlock(resv); 1704 1705 spin_lock(&vm->status_lock); 1706 } 1707 spin_unlock(&vm->status_lock); 1708 1709 return r; 1710 } 1711 1712 /** 1713 * amdgpu_vm_bo_add - add a bo to a specific vm 1714 * 1715 * @adev: amdgpu_device pointer 1716 * @vm: requested vm 1717 * @bo: amdgpu buffer object 1718 * 1719 * Add @bo into the requested vm. 1720 * Add @bo to the list of bos associated with the vm 1721 * Returns newly added bo_va or NULL for failure 1722 * 1723 * Object has to be reserved! 1724 */ 1725 struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, 1726 struct amdgpu_vm *vm, 1727 struct amdgpu_bo *bo) 1728 { 1729 struct amdgpu_bo_va *bo_va; 1730 1731 bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL); 1732 if (bo_va == NULL) { 1733 return NULL; 1734 } 1735 bo_va->base.vm = vm; 1736 bo_va->base.bo = bo; 1737 INIT_LIST_HEAD(&bo_va->base.bo_list); 1738 INIT_LIST_HEAD(&bo_va->base.vm_status); 1739 1740 bo_va->ref_count = 1; 1741 INIT_LIST_HEAD(&bo_va->valids); 1742 INIT_LIST_HEAD(&bo_va->invalids); 1743 1744 if (!bo) 1745 return bo_va; 1746 1747 list_add_tail(&bo_va->base.bo_list, &bo->va); 1748 1749 if (bo->tbo.resv != vm->root.base.bo->tbo.resv) 1750 return bo_va; 1751 1752 if (bo->preferred_domains & 1753 amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) 1754 return bo_va; 1755 1756 /* 1757 * We checked all the prerequisites, but it looks like this per VM BO 1758 * is currently evicted. add the BO to the evicted list to make sure it 1759 * is validated on next VM use to avoid fault. 1760 * */ 1761 spin_lock(&vm->status_lock); 1762 list_move_tail(&bo_va->base.vm_status, &vm->evicted); 1763 spin_unlock(&vm->status_lock); 1764 1765 return bo_va; 1766 } 1767 1768 1769 /** 1770 * amdgpu_vm_bo_insert_mapping - insert a new mapping 1771 * 1772 * @adev: amdgpu_device pointer 1773 * @bo_va: bo_va to store the address 1774 * @mapping: the mapping to insert 1775 * 1776 * Insert a new mapping into all structures. 1777 */ 1778 static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, 1779 struct amdgpu_bo_va *bo_va, 1780 struct amdgpu_bo_va_mapping *mapping) 1781 { 1782 struct amdgpu_vm *vm = bo_va->base.vm; 1783 struct amdgpu_bo *bo = bo_va->base.bo; 1784 1785 mapping->bo_va = bo_va; 1786 list_add(&mapping->list, &bo_va->invalids); 1787 amdgpu_vm_it_insert(mapping, &vm->va); 1788 1789 if (mapping->flags & AMDGPU_PTE_PRT) 1790 amdgpu_vm_prt_get(adev); 1791 1792 if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { 1793 spin_lock(&vm->status_lock); 1794 if (list_empty(&bo_va->base.vm_status)) 1795 list_add(&bo_va->base.vm_status, &vm->moved); 1796 spin_unlock(&vm->status_lock); 1797 } 1798 trace_amdgpu_vm_bo_map(bo_va, mapping); 1799 } 1800 1801 /** 1802 * amdgpu_vm_bo_map - map bo inside a vm 1803 * 1804 * @adev: amdgpu_device pointer 1805 * @bo_va: bo_va to store the address 1806 * @saddr: where to map the BO 1807 * @offset: requested offset in the BO 1808 * @flags: attributes of pages (read/write/valid/etc.) 1809 * 1810 * Add a mapping of the BO at the specefied addr into the VM. 1811 * Returns 0 for success, error for failure. 1812 * 1813 * Object has to be reserved and unreserved outside! 1814 */ 1815 int amdgpu_vm_bo_map(struct amdgpu_device *adev, 1816 struct amdgpu_bo_va *bo_va, 1817 uint64_t saddr, uint64_t offset, 1818 uint64_t size, uint64_t flags) 1819 { 1820 struct amdgpu_bo_va_mapping *mapping, *tmp; 1821 struct amdgpu_bo *bo = bo_va->base.bo; 1822 struct amdgpu_vm *vm = bo_va->base.vm; 1823 uint64_t eaddr; 1824 1825 /* validate the parameters */ 1826 if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK || 1827 size == 0 || size & AMDGPU_GPU_PAGE_MASK) 1828 return -EINVAL; 1829 1830 /* make sure object fit at this offset */ 1831 eaddr = saddr + size - 1; 1832 if (saddr >= eaddr || 1833 (bo && offset + size > amdgpu_bo_size(bo))) 1834 return -EINVAL; 1835 1836 saddr /= AMDGPU_GPU_PAGE_SIZE; 1837 eaddr /= AMDGPU_GPU_PAGE_SIZE; 1838 1839 tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); 1840 if (tmp) { 1841 /* bo and tmp overlap, invalid addr */ 1842 dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " 1843 "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr, 1844 tmp->start, tmp->last + 1); 1845 return -EINVAL; 1846 } 1847 1848 mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); 1849 if (!mapping) 1850 return -ENOMEM; 1851 1852 mapping->start = saddr; 1853 mapping->last = eaddr; 1854 mapping->offset = offset; 1855 mapping->flags = flags; 1856 1857 amdgpu_vm_bo_insert_map(adev, bo_va, mapping); 1858 1859 return 0; 1860 } 1861 1862 /** 1863 * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings 1864 * 1865 * @adev: amdgpu_device pointer 1866 * @bo_va: bo_va to store the address 1867 * @saddr: where to map the BO 1868 * @offset: requested offset in the BO 1869 * @flags: attributes of pages (read/write/valid/etc.) 1870 * 1871 * Add a mapping of the BO at the specefied addr into the VM. Replace existing 1872 * mappings as we do so. 1873 * Returns 0 for success, error for failure. 1874 * 1875 * Object has to be reserved and unreserved outside! 1876 */ 1877 int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, 1878 struct amdgpu_bo_va *bo_va, 1879 uint64_t saddr, uint64_t offset, 1880 uint64_t size, uint64_t flags) 1881 { 1882 struct amdgpu_bo_va_mapping *mapping; 1883 struct amdgpu_bo *bo = bo_va->base.bo; 1884 uint64_t eaddr; 1885 int r; 1886 1887 /* validate the parameters */ 1888 if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK || 1889 size == 0 || size & AMDGPU_GPU_PAGE_MASK) 1890 return -EINVAL; 1891 1892 /* make sure object fit at this offset */ 1893 eaddr = saddr + size - 1; 1894 if (saddr >= eaddr || 1895 (bo && offset + size > amdgpu_bo_size(bo))) 1896 return -EINVAL; 1897 1898 /* Allocate all the needed memory */ 1899 mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); 1900 if (!mapping) 1901 return -ENOMEM; 1902 1903 r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size); 1904 if (r) { 1905 kfree(mapping); 1906 return r; 1907 } 1908 1909 saddr /= AMDGPU_GPU_PAGE_SIZE; 1910 eaddr /= AMDGPU_GPU_PAGE_SIZE; 1911 1912 mapping->start = saddr; 1913 mapping->last = eaddr; 1914 mapping->offset = offset; 1915 mapping->flags = flags; 1916 1917 amdgpu_vm_bo_insert_map(adev, bo_va, mapping); 1918 1919 return 0; 1920 } 1921 1922 /** 1923 * amdgpu_vm_bo_unmap - remove bo mapping from vm 1924 * 1925 * @adev: amdgpu_device pointer 1926 * @bo_va: bo_va to remove the address from 1927 * @saddr: where to the BO is mapped 1928 * 1929 * Remove a mapping of the BO at the specefied addr from the VM. 1930 * Returns 0 for success, error for failure. 1931 * 1932 * Object has to be reserved and unreserved outside! 1933 */ 1934 int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, 1935 struct amdgpu_bo_va *bo_va, 1936 uint64_t saddr) 1937 { 1938 struct amdgpu_bo_va_mapping *mapping; 1939 struct amdgpu_vm *vm = bo_va->base.vm; 1940 bool valid = true; 1941 1942 saddr /= AMDGPU_GPU_PAGE_SIZE; 1943 1944 list_for_each_entry(mapping, &bo_va->valids, list) { 1945 if (mapping->start == saddr) 1946 break; 1947 } 1948 1949 if (&mapping->list == &bo_va->valids) { 1950 valid = false; 1951 1952 list_for_each_entry(mapping, &bo_va->invalids, list) { 1953 if (mapping->start == saddr) 1954 break; 1955 } 1956 1957 if (&mapping->list == &bo_va->invalids) 1958 return -ENOENT; 1959 } 1960 1961 list_del(&mapping->list); 1962 amdgpu_vm_it_remove(mapping, &vm->va); 1963 mapping->bo_va = NULL; 1964 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 1965 1966 if (valid) 1967 list_add(&mapping->list, &vm->freed); 1968 else 1969 amdgpu_vm_free_mapping(adev, vm, mapping, 1970 bo_va->last_pt_update); 1971 1972 return 0; 1973 } 1974 1975 /** 1976 * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range 1977 * 1978 * @adev: amdgpu_device pointer 1979 * @vm: VM structure to use 1980 * @saddr: start of the range 1981 * @size: size of the range 1982 * 1983 * Remove all mappings in a range, split them as appropriate. 1984 * Returns 0 for success, error for failure. 1985 */ 1986 int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, 1987 struct amdgpu_vm *vm, 1988 uint64_t saddr, uint64_t size) 1989 { 1990 struct amdgpu_bo_va_mapping *before, *after, *tmp, *next; 1991 LIST_HEAD(removed); 1992 uint64_t eaddr; 1993 1994 eaddr = saddr + size - 1; 1995 saddr /= AMDGPU_GPU_PAGE_SIZE; 1996 eaddr /= AMDGPU_GPU_PAGE_SIZE; 1997 1998 /* Allocate all the needed memory */ 1999 before = kzalloc(sizeof(*before), GFP_KERNEL); 2000 if (!before) 2001 return -ENOMEM; 2002 INIT_LIST_HEAD(&before->list); 2003 2004 after = kzalloc(sizeof(*after), GFP_KERNEL); 2005 if (!after) { 2006 kfree(before); 2007 return -ENOMEM; 2008 } 2009 INIT_LIST_HEAD(&after->list); 2010 2011 /* Now gather all removed mappings */ 2012 tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); 2013 while (tmp) { 2014 /* Remember mapping split at the start */ 2015 if (tmp->start < saddr) { 2016 before->start = tmp->start; 2017 before->last = saddr - 1; 2018 before->offset = tmp->offset; 2019 before->flags = tmp->flags; 2020 list_add(&before->list, &tmp->list); 2021 } 2022 2023 /* Remember mapping split at the end */ 2024 if (tmp->last > eaddr) { 2025 after->start = eaddr + 1; 2026 after->last = tmp->last; 2027 after->offset = tmp->offset; 2028 after->offset += after->start - tmp->start; 2029 after->flags = tmp->flags; 2030 list_add(&after->list, &tmp->list); 2031 } 2032 2033 list_del(&tmp->list); 2034 list_add(&tmp->list, &removed); 2035 2036 tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr); 2037 } 2038 2039 /* And free them up */ 2040 list_for_each_entry_safe(tmp, next, &removed, list) { 2041 amdgpu_vm_it_remove(tmp, &vm->va); 2042 list_del(&tmp->list); 2043 2044 if (tmp->start < saddr) 2045 tmp->start = saddr; 2046 if (tmp->last > eaddr) 2047 tmp->last = eaddr; 2048 2049 tmp->bo_va = NULL; 2050 list_add(&tmp->list, &vm->freed); 2051 trace_amdgpu_vm_bo_unmap(NULL, tmp); 2052 } 2053 2054 /* Insert partial mapping before the range */ 2055 if (!list_empty(&before->list)) { 2056 amdgpu_vm_it_insert(before, &vm->va); 2057 if (before->flags & AMDGPU_PTE_PRT) 2058 amdgpu_vm_prt_get(adev); 2059 } else { 2060 kfree(before); 2061 } 2062 2063 /* Insert partial mapping after the range */ 2064 if (!list_empty(&after->list)) { 2065 amdgpu_vm_it_insert(after, &vm->va); 2066 if (after->flags & AMDGPU_PTE_PRT) 2067 amdgpu_vm_prt_get(adev); 2068 } else { 2069 kfree(after); 2070 } 2071 2072 return 0; 2073 } 2074 2075 /** 2076 * amdgpu_vm_bo_lookup_mapping - find mapping by address 2077 * 2078 * @vm: the requested VM 2079 * 2080 * Find a mapping by it's address. 2081 */ 2082 struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, 2083 uint64_t addr) 2084 { 2085 return amdgpu_vm_it_iter_first(&vm->va, addr, addr); 2086 } 2087 2088 /** 2089 * amdgpu_vm_bo_rmv - remove a bo to a specific vm 2090 * 2091 * @adev: amdgpu_device pointer 2092 * @bo_va: requested bo_va 2093 * 2094 * Remove @bo_va->bo from the requested vm. 2095 * 2096 * Object have to be reserved! 2097 */ 2098 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, 2099 struct amdgpu_bo_va *bo_va) 2100 { 2101 struct amdgpu_bo_va_mapping *mapping, *next; 2102 struct amdgpu_vm *vm = bo_va->base.vm; 2103 2104 list_del(&bo_va->base.bo_list); 2105 2106 spin_lock(&vm->status_lock); 2107 list_del(&bo_va->base.vm_status); 2108 spin_unlock(&vm->status_lock); 2109 2110 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { 2111 list_del(&mapping->list); 2112 amdgpu_vm_it_remove(mapping, &vm->va); 2113 mapping->bo_va = NULL; 2114 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 2115 list_add(&mapping->list, &vm->freed); 2116 } 2117 list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { 2118 list_del(&mapping->list); 2119 amdgpu_vm_it_remove(mapping, &vm->va); 2120 amdgpu_vm_free_mapping(adev, vm, mapping, 2121 bo_va->last_pt_update); 2122 } 2123 2124 dma_fence_put(bo_va->last_pt_update); 2125 kfree(bo_va); 2126 } 2127 2128 /** 2129 * amdgpu_vm_bo_invalidate - mark the bo as invalid 2130 * 2131 * @adev: amdgpu_device pointer 2132 * @vm: requested vm 2133 * @bo: amdgpu buffer object 2134 * 2135 * Mark @bo as invalid. 2136 */ 2137 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, 2138 struct amdgpu_bo *bo, bool evicted) 2139 { 2140 struct amdgpu_vm_bo_base *bo_base; 2141 2142 list_for_each_entry(bo_base, &bo->va, bo_list) { 2143 struct amdgpu_vm *vm = bo_base->vm; 2144 2145 bo_base->moved = true; 2146 if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { 2147 spin_lock(&bo_base->vm->status_lock); 2148 if (bo->tbo.type == ttm_bo_type_kernel) 2149 list_move(&bo_base->vm_status, &vm->evicted); 2150 else 2151 list_move_tail(&bo_base->vm_status, 2152 &vm->evicted); 2153 spin_unlock(&bo_base->vm->status_lock); 2154 continue; 2155 } 2156 2157 if (bo->tbo.type == ttm_bo_type_kernel) { 2158 spin_lock(&bo_base->vm->status_lock); 2159 if (list_empty(&bo_base->vm_status)) 2160 list_add(&bo_base->vm_status, &vm->relocated); 2161 spin_unlock(&bo_base->vm->status_lock); 2162 continue; 2163 } 2164 2165 spin_lock(&bo_base->vm->status_lock); 2166 if (list_empty(&bo_base->vm_status)) 2167 list_add(&bo_base->vm_status, &vm->moved); 2168 spin_unlock(&bo_base->vm->status_lock); 2169 } 2170 } 2171 2172 static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) 2173 { 2174 /* Total bits covered by PD + PTs */ 2175 unsigned bits = ilog2(vm_size) + 18; 2176 2177 /* Make sure the PD is 4K in size up to 8GB address space. 2178 Above that split equal between PD and PTs */ 2179 if (vm_size <= 8) 2180 return (bits - 9); 2181 else 2182 return ((bits + 3) / 2); 2183 } 2184 2185 /** 2186 * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size 2187 * 2188 * @adev: amdgpu_device pointer 2189 * @vm_size: the default vm size if it's set auto 2190 */ 2191 void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, 2192 uint32_t fragment_size_default, unsigned max_level, 2193 unsigned max_bits) 2194 { 2195 uint64_t tmp; 2196 2197 /* adjust vm size first */ 2198 if (amdgpu_vm_size != -1) { 2199 unsigned max_size = 1 << (max_bits - 30); 2200 2201 vm_size = amdgpu_vm_size; 2202 if (vm_size > max_size) { 2203 dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n", 2204 amdgpu_vm_size, max_size); 2205 vm_size = max_size; 2206 } 2207 } 2208 2209 adev->vm_manager.max_pfn = (uint64_t)vm_size << 18; 2210 2211 tmp = roundup_pow_of_two(adev->vm_manager.max_pfn); 2212 if (amdgpu_vm_block_size != -1) 2213 tmp >>= amdgpu_vm_block_size - 9; 2214 tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1; 2215 adev->vm_manager.num_level = min(max_level, (unsigned)tmp); 2216 switch (adev->vm_manager.num_level) { 2217 case 3: 2218 adev->vm_manager.root_level = AMDGPU_VM_PDB2; 2219 break; 2220 case 2: 2221 adev->vm_manager.root_level = AMDGPU_VM_PDB1; 2222 break; 2223 case 1: 2224 adev->vm_manager.root_level = AMDGPU_VM_PDB0; 2225 break; 2226 default: 2227 dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n"); 2228 } 2229 /* block size depends on vm size and hw setup*/ 2230 if (amdgpu_vm_block_size != -1) 2231 adev->vm_manager.block_size = 2232 min((unsigned)amdgpu_vm_block_size, max_bits 2233 - AMDGPU_GPU_PAGE_SHIFT 2234 - 9 * adev->vm_manager.num_level); 2235 else if (adev->vm_manager.num_level > 1) 2236 adev->vm_manager.block_size = 9; 2237 else 2238 adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp); 2239 2240 if (amdgpu_vm_fragment_size == -1) 2241 adev->vm_manager.fragment_size = fragment_size_default; 2242 else 2243 adev->vm_manager.fragment_size = amdgpu_vm_fragment_size; 2244 2245 DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n", 2246 vm_size, adev->vm_manager.num_level + 1, 2247 adev->vm_manager.block_size, 2248 adev->vm_manager.fragment_size); 2249 } 2250 2251 /** 2252 * amdgpu_vm_init - initialize a vm instance 2253 * 2254 * @adev: amdgpu_device pointer 2255 * @vm: requested vm 2256 * @vm_context: Indicates if it GFX or Compute context 2257 * 2258 * Init @vm fields. 2259 */ 2260 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, 2261 int vm_context, unsigned int pasid) 2262 { 2263 const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, 2264 AMDGPU_VM_PTE_COUNT(adev) * 8); 2265 uint64_t init_pde_value = 0, flags; 2266 unsigned ring_instance; 2267 struct amdgpu_ring *ring; 2268 struct drm_sched_rq *rq; 2269 unsigned long size; 2270 int r, i; 2271 2272 vm->va = RB_ROOT_CACHED; 2273 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) 2274 vm->reserved_vmid[i] = NULL; 2275 spin_lock_init(&vm->status_lock); 2276 INIT_LIST_HEAD(&vm->evicted); 2277 INIT_LIST_HEAD(&vm->relocated); 2278 INIT_LIST_HEAD(&vm->moved); 2279 INIT_LIST_HEAD(&vm->freed); 2280 2281 /* create scheduler entity for page table updates */ 2282 2283 ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring); 2284 ring_instance %= adev->vm_manager.vm_pte_num_rings; 2285 ring = adev->vm_manager.vm_pte_rings[ring_instance]; 2286 rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; 2287 r = drm_sched_entity_init(&ring->sched, &vm->entity, 2288 rq, amdgpu_sched_jobs, NULL); 2289 if (r) 2290 return r; 2291 2292 vm->pte_support_ats = false; 2293 2294 if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { 2295 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2296 AMDGPU_VM_USE_CPU_FOR_COMPUTE); 2297 2298 if (adev->asic_type == CHIP_RAVEN) { 2299 vm->pte_support_ats = true; 2300 init_pde_value = AMDGPU_PTE_DEFAULT_ATC 2301 | AMDGPU_PDE_PTE; 2302 2303 } 2304 } else 2305 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2306 AMDGPU_VM_USE_CPU_FOR_GFX); 2307 DRM_DEBUG_DRIVER("VM update mode is %s\n", 2308 vm->use_cpu_for_update ? "CPU" : "SDMA"); 2309 WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), 2310 "CPU update of VM recommended only for large BAR system\n"); 2311 vm->last_update = NULL; 2312 2313 flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | 2314 AMDGPU_GEM_CREATE_VRAM_CLEARED; 2315 if (vm->use_cpu_for_update) 2316 flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 2317 else 2318 flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | 2319 AMDGPU_GEM_CREATE_SHADOW); 2320 2321 size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); 2322 r = amdgpu_bo_create(adev, size, align, true, AMDGPU_GEM_DOMAIN_VRAM, 2323 flags, NULL, NULL, init_pde_value, 2324 &vm->root.base.bo); 2325 if (r) 2326 goto error_free_sched_entity; 2327 2328 r = amdgpu_bo_reserve(vm->root.base.bo, true); 2329 if (r) 2330 goto error_free_root; 2331 2332 vm->root.base.vm = vm; 2333 list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); 2334 list_add_tail(&vm->root.base.vm_status, &vm->evicted); 2335 amdgpu_bo_unreserve(vm->root.base.bo); 2336 2337 if (pasid) { 2338 unsigned long flags; 2339 2340 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); 2341 r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1, 2342 GFP_ATOMIC); 2343 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); 2344 if (r < 0) 2345 goto error_free_root; 2346 2347 vm->pasid = pasid; 2348 } 2349 2350 INIT_KFIFO(vm->faults); 2351 vm->fault_credit = 16; 2352 2353 return 0; 2354 2355 error_free_root: 2356 amdgpu_bo_unref(&vm->root.base.bo->shadow); 2357 amdgpu_bo_unref(&vm->root.base.bo); 2358 vm->root.base.bo = NULL; 2359 2360 error_free_sched_entity: 2361 drm_sched_entity_fini(&ring->sched, &vm->entity); 2362 2363 return r; 2364 } 2365 2366 /** 2367 * amdgpu_vm_free_levels - free PD/PT levels 2368 * 2369 * @adev: amdgpu device structure 2370 * @parent: PD/PT starting level to free 2371 * @level: level of parent structure 2372 * 2373 * Free the page directory or page table level and all sub levels. 2374 */ 2375 static void amdgpu_vm_free_levels(struct amdgpu_device *adev, 2376 struct amdgpu_vm_pt *parent, 2377 unsigned level) 2378 { 2379 unsigned i, num_entries = amdgpu_vm_num_entries(adev, level); 2380 2381 if (parent->base.bo) { 2382 list_del(&parent->base.bo_list); 2383 list_del(&parent->base.vm_status); 2384 amdgpu_bo_unref(&parent->base.bo->shadow); 2385 amdgpu_bo_unref(&parent->base.bo); 2386 } 2387 2388 if (parent->entries) 2389 for (i = 0; i < num_entries; i++) 2390 amdgpu_vm_free_levels(adev, &parent->entries[i], 2391 level + 1); 2392 2393 kvfree(parent->entries); 2394 } 2395 2396 /** 2397 * amdgpu_vm_fini - tear down a vm instance 2398 * 2399 * @adev: amdgpu_device pointer 2400 * @vm: requested vm 2401 * 2402 * Tear down @vm. 2403 * Unbind the VM and remove all bos from the vm bo list 2404 */ 2405 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) 2406 { 2407 struct amdgpu_bo_va_mapping *mapping, *tmp; 2408 bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; 2409 struct amdgpu_bo *root; 2410 u64 fault; 2411 int i, r; 2412 2413 /* Clear pending page faults from IH when the VM is destroyed */ 2414 while (kfifo_get(&vm->faults, &fault)) 2415 amdgpu_ih_clear_fault(adev, fault); 2416 2417 if (vm->pasid) { 2418 unsigned long flags; 2419 2420 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); 2421 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); 2422 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); 2423 } 2424 2425 drm_sched_entity_fini(vm->entity.sched, &vm->entity); 2426 2427 if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { 2428 dev_err(adev->dev, "still active bo inside vm\n"); 2429 } 2430 rbtree_postorder_for_each_entry_safe(mapping, tmp, 2431 &vm->va.rb_root, rb) { 2432 list_del(&mapping->list); 2433 amdgpu_vm_it_remove(mapping, &vm->va); 2434 kfree(mapping); 2435 } 2436 list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { 2437 if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { 2438 amdgpu_vm_prt_fini(adev, vm); 2439 prt_fini_needed = false; 2440 } 2441 2442 list_del(&mapping->list); 2443 amdgpu_vm_free_mapping(adev, vm, mapping, NULL); 2444 } 2445 2446 root = amdgpu_bo_ref(vm->root.base.bo); 2447 r = amdgpu_bo_reserve(root, true); 2448 if (r) { 2449 dev_err(adev->dev, "Leaking page tables because BO reservation failed\n"); 2450 } else { 2451 amdgpu_vm_free_levels(adev, &vm->root, 2452 adev->vm_manager.root_level); 2453 amdgpu_bo_unreserve(root); 2454 } 2455 amdgpu_bo_unref(&root); 2456 dma_fence_put(vm->last_update); 2457 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) 2458 amdgpu_vmid_free_reserved(adev, vm, i); 2459 } 2460 2461 /** 2462 * amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID 2463 * 2464 * @adev: amdgpu_device pointer 2465 * @pasid: PASID do identify the VM 2466 * 2467 * This function is expected to be called in interrupt context. Returns 2468 * true if there was fault credit, false otherwise 2469 */ 2470 bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, 2471 unsigned int pasid) 2472 { 2473 struct amdgpu_vm *vm; 2474 2475 spin_lock(&adev->vm_manager.pasid_lock); 2476 vm = idr_find(&adev->vm_manager.pasid_idr, pasid); 2477 if (!vm) { 2478 /* VM not found, can't track fault credit */ 2479 spin_unlock(&adev->vm_manager.pasid_lock); 2480 return true; 2481 } 2482 2483 /* No lock needed. only accessed by IRQ handler */ 2484 if (!vm->fault_credit) { 2485 /* Too many faults in this VM */ 2486 spin_unlock(&adev->vm_manager.pasid_lock); 2487 return false; 2488 } 2489 2490 vm->fault_credit--; 2491 spin_unlock(&adev->vm_manager.pasid_lock); 2492 return true; 2493 } 2494 2495 /** 2496 * amdgpu_vm_manager_init - init the VM manager 2497 * 2498 * @adev: amdgpu_device pointer 2499 * 2500 * Initialize the VM manager structures 2501 */ 2502 void amdgpu_vm_manager_init(struct amdgpu_device *adev) 2503 { 2504 unsigned i; 2505 2506 amdgpu_vmid_mgr_init(adev); 2507 2508 adev->vm_manager.fence_context = 2509 dma_fence_context_alloc(AMDGPU_MAX_RINGS); 2510 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 2511 adev->vm_manager.seqno[i] = 0; 2512 2513 atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); 2514 spin_lock_init(&adev->vm_manager.prt_lock); 2515 atomic_set(&adev->vm_manager.num_prt_users, 0); 2516 2517 /* If not overridden by the user, by default, only in large BAR systems 2518 * Compute VM tables will be updated by CPU 2519 */ 2520 #ifdef CONFIG_X86_64 2521 if (amdgpu_vm_update_mode == -1) { 2522 if (amdgpu_vm_is_large_bar(adev)) 2523 adev->vm_manager.vm_update_mode = 2524 AMDGPU_VM_USE_CPU_FOR_COMPUTE; 2525 else 2526 adev->vm_manager.vm_update_mode = 0; 2527 } else 2528 adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode; 2529 #else 2530 adev->vm_manager.vm_update_mode = 0; 2531 #endif 2532 2533 idr_init(&adev->vm_manager.pasid_idr); 2534 spin_lock_init(&adev->vm_manager.pasid_lock); 2535 } 2536 2537 /** 2538 * amdgpu_vm_manager_fini - cleanup VM manager 2539 * 2540 * @adev: amdgpu_device pointer 2541 * 2542 * Cleanup the VM manager and free resources. 2543 */ 2544 void amdgpu_vm_manager_fini(struct amdgpu_device *adev) 2545 { 2546 WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr)); 2547 idr_destroy(&adev->vm_manager.pasid_idr); 2548 2549 amdgpu_vmid_mgr_fini(adev); 2550 } 2551 2552 int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 2553 { 2554 union drm_amdgpu_vm *args = data; 2555 struct amdgpu_device *adev = dev->dev_private; 2556 struct amdgpu_fpriv *fpriv = filp->driver_priv; 2557 int r; 2558 2559 switch (args->in.op) { 2560 case AMDGPU_VM_OP_RESERVE_VMID: 2561 /* current, we only have requirement to reserve vmid from gfxhub */ 2562 r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); 2563 if (r) 2564 return r; 2565 break; 2566 case AMDGPU_VM_OP_UNRESERVE_VMID: 2567 amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); 2568 break; 2569 default: 2570 return -EINVAL; 2571 } 2572 2573 return 0; 2574 } 2575