1 /* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28 #include <drm/drmP.h> 29 #include <drm/radeon_drm.h> 30 #include "radeon.h" 31 #include "radeon_trace.h" 32 33 /* 34 * GPUVM 35 * GPUVM is similar to the legacy gart on older asics, however 36 * rather than there being a single global gart table 37 * for the entire GPU, there are multiple VM page tables active 38 * at any given time. The VM page tables can contain a mix 39 * vram pages and system memory pages and system memory pages 40 * can be mapped as snooped (cached system pages) or unsnooped 41 * (uncached system pages). 42 * Each VM has an ID associated with it and there is a page table 43 * associated with each VMID. When execting a command buffer, 44 * the kernel tells the the ring what VMID to use for that command 45 * buffer. VMIDs are allocated dynamically as commands are submitted. 46 * The userspace drivers maintain their own address space and the kernel 47 * sets up their pages tables accordingly when they submit their 48 * command buffers and a VMID is assigned. 49 * Cayman/Trinity support up to 8 active VMs at any given time; 50 * SI supports 16. 51 */ 52 53 /** 54 * radeon_vm_num_pde - return the number of page directory entries 55 * 56 * @rdev: radeon_device pointer 57 * 58 * Calculate the number of page directory entries (cayman+). 59 */ 60 static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) 61 { 62 return rdev->vm_manager.max_pfn >> radeon_vm_block_size; 63 } 64 65 /** 66 * radeon_vm_directory_size - returns the size of the page directory in bytes 67 * 68 * @rdev: radeon_device pointer 69 * 70 * Calculate the size of the page directory in bytes (cayman+). 71 */ 72 static unsigned radeon_vm_directory_size(struct radeon_device *rdev) 73 { 74 return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); 75 } 76 77 /** 78 * radeon_vm_manager_init - init the vm manager 79 * 80 * @rdev: radeon_device pointer 81 * 82 * Init the vm manager (cayman+). 83 * Returns 0 for success, error for failure. 84 */ 85 int radeon_vm_manager_init(struct radeon_device *rdev) 86 { 87 int r; 88 89 if (!rdev->vm_manager.enabled) { 90 r = radeon_asic_vm_init(rdev); 91 if (r) 92 return r; 93 94 rdev->vm_manager.enabled = true; 95 } 96 return 0; 97 } 98 99 /** 100 * radeon_vm_manager_fini - tear down the vm manager 101 * 102 * @rdev: radeon_device pointer 103 * 104 * Tear down the VM manager (cayman+). 105 */ 106 void radeon_vm_manager_fini(struct radeon_device *rdev) 107 { 108 int i; 109 110 if (!rdev->vm_manager.enabled) 111 return; 112 113 for (i = 0; i < RADEON_NUM_VM; ++i) 114 radeon_fence_unref(&rdev->vm_manager.active[i]); 115 radeon_asic_vm_fini(rdev); 116 rdev->vm_manager.enabled = false; 117 } 118 119 /** 120 * radeon_vm_get_bos - add the vm BOs to a validation list 121 * 122 * @vm: vm providing the BOs 123 * @head: head of validation list 124 * 125 * Add the page directory to the list of BOs to 126 * validate for command submission (cayman+). 127 */ 128 struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev, 129 struct radeon_vm *vm, 130 struct list_head *head) 131 { 132 struct radeon_cs_reloc *list; 133 unsigned i, idx; 134 135 list = drm_malloc_ab(vm->max_pde_used + 2, 136 sizeof(struct radeon_cs_reloc)); 137 if (!list) 138 return NULL; 139 140 /* add the vm page table to the list */ 141 list[0].gobj = NULL; 142 list[0].robj = vm->page_directory; 143 list[0].prefered_domains = RADEON_GEM_DOMAIN_VRAM; 144 list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM; 145 list[0].tv.bo = &vm->page_directory->tbo; 146 list[0].tv.shared = false; 147 list[0].tiling_flags = 0; 148 list[0].handle = 0; 149 list_add(&list[0].tv.head, head); 150 151 for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { 152 if (!vm->page_tables[i].bo) 153 continue; 154 155 list[idx].gobj = NULL; 156 list[idx].robj = vm->page_tables[i].bo; 157 list[idx].prefered_domains = RADEON_GEM_DOMAIN_VRAM; 158 list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM; 159 list[idx].tv.bo = &list[idx].robj->tbo; 160 list[idx].tv.shared = false; 161 list[idx].tiling_flags = 0; 162 list[idx].handle = 0; 163 list_add(&list[idx++].tv.head, head); 164 } 165 166 return list; 167 } 168 169 /** 170 * radeon_vm_grab_id - allocate the next free VMID 171 * 172 * @rdev: radeon_device pointer 173 * @vm: vm to allocate id for 174 * @ring: ring we want to submit job to 175 * 176 * Allocate an id for the vm (cayman+). 177 * Returns the fence we need to sync to (if any). 178 * 179 * Global and local mutex must be locked! 180 */ 181 struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, 182 struct radeon_vm *vm, int ring) 183 { 184 struct radeon_fence *best[RADEON_NUM_RINGS] = {}; 185 unsigned choices[2] = {}; 186 unsigned i; 187 188 /* check if the id is still valid */ 189 if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id]) 190 return NULL; 191 192 /* we definately need to flush */ 193 radeon_fence_unref(&vm->last_flush); 194 195 /* skip over VMID 0, since it is the system VM */ 196 for (i = 1; i < rdev->vm_manager.nvm; ++i) { 197 struct radeon_fence *fence = rdev->vm_manager.active[i]; 198 199 if (fence == NULL) { 200 /* found a free one */ 201 vm->id = i; 202 trace_radeon_vm_grab_id(vm->id, ring); 203 return NULL; 204 } 205 206 if (radeon_fence_is_earlier(fence, best[fence->ring])) { 207 best[fence->ring] = fence; 208 choices[fence->ring == ring ? 0 : 1] = i; 209 } 210 } 211 212 for (i = 0; i < 2; ++i) { 213 if (choices[i]) { 214 vm->id = choices[i]; 215 trace_radeon_vm_grab_id(vm->id, ring); 216 return rdev->vm_manager.active[choices[i]]; 217 } 218 } 219 220 /* should never happen */ 221 BUG(); 222 return NULL; 223 } 224 225 /** 226 * radeon_vm_flush - hardware flush the vm 227 * 228 * @rdev: radeon_device pointer 229 * @vm: vm we want to flush 230 * @ring: ring to use for flush 231 * 232 * Flush the vm (cayman+). 233 * 234 * Global and local mutex must be locked! 235 */ 236 void radeon_vm_flush(struct radeon_device *rdev, 237 struct radeon_vm *vm, 238 int ring) 239 { 240 uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); 241 242 /* if we can't remember our last VM flush then flush now! */ 243 if (!vm->last_flush || pd_addr != vm->pd_gpu_addr) { 244 trace_radeon_vm_flush(pd_addr, ring, vm->id); 245 vm->pd_gpu_addr = pd_addr; 246 radeon_ring_vm_flush(rdev, ring, vm); 247 } 248 } 249 250 /** 251 * radeon_vm_fence - remember fence for vm 252 * 253 * @rdev: radeon_device pointer 254 * @vm: vm we want to fence 255 * @fence: fence to remember 256 * 257 * Fence the vm (cayman+). 258 * Set the fence used to protect page table and id. 259 * 260 * Global and local mutex must be locked! 261 */ 262 void radeon_vm_fence(struct radeon_device *rdev, 263 struct radeon_vm *vm, 264 struct radeon_fence *fence) 265 { 266 radeon_fence_unref(&vm->fence); 267 vm->fence = radeon_fence_ref(fence); 268 269 radeon_fence_unref(&rdev->vm_manager.active[vm->id]); 270 rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); 271 272 radeon_fence_unref(&vm->last_id_use); 273 vm->last_id_use = radeon_fence_ref(fence); 274 275 /* we just flushed the VM, remember that */ 276 if (!vm->last_flush) 277 vm->last_flush = radeon_fence_ref(fence); 278 } 279 280 /** 281 * radeon_vm_bo_find - find the bo_va for a specific vm & bo 282 * 283 * @vm: requested vm 284 * @bo: requested buffer object 285 * 286 * Find @bo inside the requested vm (cayman+). 287 * Search inside the @bos vm list for the requested vm 288 * Returns the found bo_va or NULL if none is found 289 * 290 * Object has to be reserved! 291 */ 292 struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, 293 struct radeon_bo *bo) 294 { 295 struct radeon_bo_va *bo_va; 296 297 list_for_each_entry(bo_va, &bo->va, bo_list) { 298 if (bo_va->vm == vm) { 299 return bo_va; 300 } 301 } 302 return NULL; 303 } 304 305 /** 306 * radeon_vm_bo_add - add a bo to a specific vm 307 * 308 * @rdev: radeon_device pointer 309 * @vm: requested vm 310 * @bo: radeon buffer object 311 * 312 * Add @bo into the requested vm (cayman+). 313 * Add @bo to the list of bos associated with the vm 314 * Returns newly added bo_va or NULL for failure 315 * 316 * Object has to be reserved! 317 */ 318 struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, 319 struct radeon_vm *vm, 320 struct radeon_bo *bo) 321 { 322 struct radeon_bo_va *bo_va; 323 324 bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); 325 if (bo_va == NULL) { 326 return NULL; 327 } 328 bo_va->vm = vm; 329 bo_va->bo = bo; 330 bo_va->it.start = 0; 331 bo_va->it.last = 0; 332 bo_va->flags = 0; 333 bo_va->addr = 0; 334 bo_va->ref_count = 1; 335 INIT_LIST_HEAD(&bo_va->bo_list); 336 INIT_LIST_HEAD(&bo_va->vm_status); 337 338 mutex_lock(&vm->mutex); 339 list_add_tail(&bo_va->bo_list, &bo->va); 340 mutex_unlock(&vm->mutex); 341 342 return bo_va; 343 } 344 345 /** 346 * radeon_vm_set_pages - helper to call the right asic function 347 * 348 * @rdev: radeon_device pointer 349 * @ib: indirect buffer to fill with commands 350 * @pe: addr of the page entry 351 * @addr: dst addr to write into pe 352 * @count: number of page entries to update 353 * @incr: increase next addr by incr bytes 354 * @flags: hw access flags 355 * 356 * Traces the parameters and calls the right asic functions 357 * to setup the page table using the DMA. 358 */ 359 static void radeon_vm_set_pages(struct radeon_device *rdev, 360 struct radeon_ib *ib, 361 uint64_t pe, 362 uint64_t addr, unsigned count, 363 uint32_t incr, uint32_t flags) 364 { 365 trace_radeon_vm_set_page(pe, addr, count, incr, flags); 366 367 if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { 368 uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; 369 radeon_asic_vm_copy_pages(rdev, ib, pe, src, count); 370 371 } else if ((flags & R600_PTE_SYSTEM) || (count < 3)) { 372 radeon_asic_vm_write_pages(rdev, ib, pe, addr, 373 count, incr, flags); 374 375 } else { 376 radeon_asic_vm_set_pages(rdev, ib, pe, addr, 377 count, incr, flags); 378 } 379 } 380 381 /** 382 * radeon_vm_clear_bo - initially clear the page dir/table 383 * 384 * @rdev: radeon_device pointer 385 * @bo: bo to clear 386 */ 387 static int radeon_vm_clear_bo(struct radeon_device *rdev, 388 struct radeon_bo *bo) 389 { 390 struct ttm_validate_buffer tv; 391 struct ww_acquire_ctx ticket; 392 struct list_head head; 393 struct radeon_ib ib; 394 unsigned entries; 395 uint64_t addr; 396 int r; 397 398 memset(&tv, 0, sizeof(tv)); 399 tv.bo = &bo->tbo; 400 tv.shared = false; 401 402 INIT_LIST_HEAD(&head); 403 list_add(&tv.head, &head); 404 405 r = ttm_eu_reserve_buffers(&ticket, &head, true); 406 if (r) 407 return r; 408 409 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 410 if (r) 411 goto error; 412 413 addr = radeon_bo_gpu_offset(bo); 414 entries = radeon_bo_size(bo) / 8; 415 416 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, 256); 417 if (r) 418 goto error; 419 420 ib.length_dw = 0; 421 422 radeon_vm_set_pages(rdev, &ib, addr, 0, entries, 0, 0); 423 radeon_asic_vm_pad_ib(rdev, &ib); 424 WARN_ON(ib.length_dw > 64); 425 426 r = radeon_ib_schedule(rdev, &ib, NULL, false); 427 if (r) 428 goto error; 429 430 ttm_eu_fence_buffer_objects(&ticket, &head, &ib.fence->base); 431 radeon_ib_free(rdev, &ib); 432 433 return 0; 434 435 error: 436 ttm_eu_backoff_reservation(&ticket, &head); 437 return r; 438 } 439 440 /** 441 * radeon_vm_bo_set_addr - set bos virtual address inside a vm 442 * 443 * @rdev: radeon_device pointer 444 * @bo_va: bo_va to store the address 445 * @soffset: requested offset of the buffer in the VM address space 446 * @flags: attributes of pages (read/write/valid/etc.) 447 * 448 * Set offset of @bo_va (cayman+). 449 * Validate and set the offset requested within the vm address space. 450 * Returns 0 for success, error for failure. 451 * 452 * Object has to be reserved! 453 */ 454 int radeon_vm_bo_set_addr(struct radeon_device *rdev, 455 struct radeon_bo_va *bo_va, 456 uint64_t soffset, 457 uint32_t flags) 458 { 459 uint64_t size = radeon_bo_size(bo_va->bo); 460 struct radeon_vm *vm = bo_va->vm; 461 unsigned last_pfn, pt_idx; 462 uint64_t eoffset; 463 int r; 464 465 if (soffset) { 466 /* make sure object fit at this offset */ 467 eoffset = soffset + size; 468 if (soffset >= eoffset) { 469 return -EINVAL; 470 } 471 472 last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; 473 if (last_pfn > rdev->vm_manager.max_pfn) { 474 dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", 475 last_pfn, rdev->vm_manager.max_pfn); 476 return -EINVAL; 477 } 478 479 } else { 480 eoffset = last_pfn = 0; 481 } 482 483 mutex_lock(&vm->mutex); 484 if (bo_va->it.start || bo_va->it.last) { 485 if (bo_va->addr) { 486 /* add a clone of the bo_va to clear the old address */ 487 struct radeon_bo_va *tmp; 488 tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); 489 if (!tmp) { 490 mutex_unlock(&vm->mutex); 491 return -ENOMEM; 492 } 493 tmp->it.start = bo_va->it.start; 494 tmp->it.last = bo_va->it.last; 495 tmp->vm = vm; 496 tmp->addr = bo_va->addr; 497 tmp->bo = radeon_bo_ref(bo_va->bo); 498 list_add(&tmp->vm_status, &vm->freed); 499 } 500 501 interval_tree_remove(&bo_va->it, &vm->va); 502 bo_va->it.start = 0; 503 bo_va->it.last = 0; 504 } 505 506 soffset /= RADEON_GPU_PAGE_SIZE; 507 eoffset /= RADEON_GPU_PAGE_SIZE; 508 if (soffset || eoffset) { 509 struct interval_tree_node *it; 510 it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1); 511 if (it) { 512 struct radeon_bo_va *tmp; 513 tmp = container_of(it, struct radeon_bo_va, it); 514 /* bo and tmp overlap, invalid offset */ 515 dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with " 516 "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo, 517 soffset, tmp->bo, tmp->it.start, tmp->it.last); 518 mutex_unlock(&vm->mutex); 519 return -EINVAL; 520 } 521 bo_va->it.start = soffset; 522 bo_va->it.last = eoffset - 1; 523 interval_tree_insert(&bo_va->it, &vm->va); 524 } 525 526 bo_va->flags = flags; 527 bo_va->addr = 0; 528 529 soffset >>= radeon_vm_block_size; 530 eoffset >>= radeon_vm_block_size; 531 532 BUG_ON(eoffset >= radeon_vm_num_pdes(rdev)); 533 534 if (eoffset > vm->max_pde_used) 535 vm->max_pde_used = eoffset; 536 537 radeon_bo_unreserve(bo_va->bo); 538 539 /* walk over the address space and allocate the page tables */ 540 for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) { 541 struct radeon_bo *pt; 542 543 if (vm->page_tables[pt_idx].bo) 544 continue; 545 546 /* drop mutex to allocate and clear page table */ 547 mutex_unlock(&vm->mutex); 548 549 r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8, 550 RADEON_GPU_PAGE_SIZE, true, 551 RADEON_GEM_DOMAIN_VRAM, 0, 552 NULL, NULL, &pt); 553 if (r) 554 return r; 555 556 r = radeon_vm_clear_bo(rdev, pt); 557 if (r) { 558 radeon_bo_unref(&pt); 559 radeon_bo_reserve(bo_va->bo, false); 560 return r; 561 } 562 563 /* aquire mutex again */ 564 mutex_lock(&vm->mutex); 565 if (vm->page_tables[pt_idx].bo) { 566 /* someone else allocated the pt in the meantime */ 567 mutex_unlock(&vm->mutex); 568 radeon_bo_unref(&pt); 569 mutex_lock(&vm->mutex); 570 continue; 571 } 572 573 vm->page_tables[pt_idx].addr = 0; 574 vm->page_tables[pt_idx].bo = pt; 575 } 576 577 mutex_unlock(&vm->mutex); 578 return radeon_bo_reserve(bo_va->bo, false); 579 } 580 581 /** 582 * radeon_vm_map_gart - get the physical address of a gart page 583 * 584 * @rdev: radeon_device pointer 585 * @addr: the unmapped addr 586 * 587 * Look up the physical address of the page that the pte resolves 588 * to (cayman+). 589 * Returns the physical address of the page. 590 */ 591 uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) 592 { 593 uint64_t result; 594 595 /* page table offset */ 596 result = rdev->gart.pages_addr[addr >> PAGE_SHIFT]; 597 598 /* in case cpu page size != gpu page size*/ 599 result |= addr & (~PAGE_MASK); 600 601 return result; 602 } 603 604 /** 605 * radeon_vm_page_flags - translate page flags to what the hw uses 606 * 607 * @flags: flags comming from userspace 608 * 609 * Translate the flags the userspace ABI uses to hw flags. 610 */ 611 static uint32_t radeon_vm_page_flags(uint32_t flags) 612 { 613 uint32_t hw_flags = 0; 614 hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; 615 hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; 616 hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; 617 if (flags & RADEON_VM_PAGE_SYSTEM) { 618 hw_flags |= R600_PTE_SYSTEM; 619 hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; 620 } 621 return hw_flags; 622 } 623 624 /** 625 * radeon_vm_update_pdes - make sure that page directory is valid 626 * 627 * @rdev: radeon_device pointer 628 * @vm: requested vm 629 * @start: start of GPU address range 630 * @end: end of GPU address range 631 * 632 * Allocates new page tables if necessary 633 * and updates the page directory (cayman+). 634 * Returns 0 for success, error for failure. 635 * 636 * Global and local mutex must be locked! 637 */ 638 int radeon_vm_update_page_directory(struct radeon_device *rdev, 639 struct radeon_vm *vm) 640 { 641 struct radeon_bo *pd = vm->page_directory; 642 uint64_t pd_addr = radeon_bo_gpu_offset(pd); 643 uint32_t incr = RADEON_VM_PTE_COUNT * 8; 644 uint64_t last_pde = ~0, last_pt = ~0; 645 unsigned count = 0, pt_idx, ndw; 646 struct radeon_ib ib; 647 int r; 648 649 /* padding, etc. */ 650 ndw = 64; 651 652 /* assume the worst case */ 653 ndw += vm->max_pde_used * 6; 654 655 /* update too big for an IB */ 656 if (ndw > 0xfffff) 657 return -ENOMEM; 658 659 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); 660 if (r) 661 return r; 662 ib.length_dw = 0; 663 664 /* walk over the address space and update the page directory */ 665 for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { 666 struct radeon_bo *bo = vm->page_tables[pt_idx].bo; 667 uint64_t pde, pt; 668 669 if (bo == NULL) 670 continue; 671 672 pt = radeon_bo_gpu_offset(bo); 673 if (vm->page_tables[pt_idx].addr == pt) 674 continue; 675 vm->page_tables[pt_idx].addr = pt; 676 677 pde = pd_addr + pt_idx * 8; 678 if (((last_pde + 8 * count) != pde) || 679 ((last_pt + incr * count) != pt)) { 680 681 if (count) { 682 radeon_vm_set_pages(rdev, &ib, last_pde, 683 last_pt, count, incr, 684 R600_PTE_VALID); 685 } 686 687 count = 1; 688 last_pde = pde; 689 last_pt = pt; 690 } else { 691 ++count; 692 } 693 } 694 695 if (count) 696 radeon_vm_set_pages(rdev, &ib, last_pde, last_pt, count, 697 incr, R600_PTE_VALID); 698 699 if (ib.length_dw != 0) { 700 radeon_asic_vm_pad_ib(rdev, &ib); 701 702 radeon_semaphore_sync_resv(rdev, ib.semaphore, pd->tbo.resv, false); 703 radeon_semaphore_sync_fence(ib.semaphore, vm->last_id_use); 704 WARN_ON(ib.length_dw > ndw); 705 r = radeon_ib_schedule(rdev, &ib, NULL, false); 706 if (r) { 707 radeon_ib_free(rdev, &ib); 708 return r; 709 } 710 radeon_fence_unref(&vm->fence); 711 vm->fence = radeon_fence_ref(ib.fence); 712 radeon_fence_unref(&vm->last_flush); 713 } 714 radeon_ib_free(rdev, &ib); 715 716 return 0; 717 } 718 719 /** 720 * radeon_vm_frag_ptes - add fragment information to PTEs 721 * 722 * @rdev: radeon_device pointer 723 * @ib: IB for the update 724 * @pe_start: first PTE to handle 725 * @pe_end: last PTE to handle 726 * @addr: addr those PTEs should point to 727 * @flags: hw mapping flags 728 * 729 * Global and local mutex must be locked! 730 */ 731 static void radeon_vm_frag_ptes(struct radeon_device *rdev, 732 struct radeon_ib *ib, 733 uint64_t pe_start, uint64_t pe_end, 734 uint64_t addr, uint32_t flags) 735 { 736 /** 737 * The MC L1 TLB supports variable sized pages, based on a fragment 738 * field in the PTE. When this field is set to a non-zero value, page 739 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE 740 * flags are considered valid for all PTEs within the fragment range 741 * and corresponding mappings are assumed to be physically contiguous. 742 * 743 * The L1 TLB can store a single PTE for the whole fragment, 744 * significantly increasing the space available for translation 745 * caching. This leads to large improvements in throughput when the 746 * TLB is under pressure. 747 * 748 * The L2 TLB distributes small and large fragments into two 749 * asymmetric partitions. The large fragment cache is significantly 750 * larger. Thus, we try to use large fragments wherever possible. 751 * Userspace can support this by aligning virtual base address and 752 * allocation size to the fragment size. 753 */ 754 755 /* NI is optimized for 256KB fragments, SI and newer for 64KB */ 756 uint64_t frag_flags = rdev->family == CHIP_CAYMAN ? 757 R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB; 758 uint64_t frag_align = rdev->family == CHIP_CAYMAN ? 0x200 : 0x80; 759 760 uint64_t frag_start = ALIGN(pe_start, frag_align); 761 uint64_t frag_end = pe_end & ~(frag_align - 1); 762 763 unsigned count; 764 765 /* system pages are non continuously */ 766 if ((flags & R600_PTE_SYSTEM) || !(flags & R600_PTE_VALID) || 767 (frag_start >= frag_end)) { 768 769 count = (pe_end - pe_start) / 8; 770 radeon_vm_set_pages(rdev, ib, pe_start, addr, count, 771 RADEON_GPU_PAGE_SIZE, flags); 772 return; 773 } 774 775 /* handle the 4K area at the beginning */ 776 if (pe_start != frag_start) { 777 count = (frag_start - pe_start) / 8; 778 radeon_vm_set_pages(rdev, ib, pe_start, addr, count, 779 RADEON_GPU_PAGE_SIZE, flags); 780 addr += RADEON_GPU_PAGE_SIZE * count; 781 } 782 783 /* handle the area in the middle */ 784 count = (frag_end - frag_start) / 8; 785 radeon_vm_set_pages(rdev, ib, frag_start, addr, count, 786 RADEON_GPU_PAGE_SIZE, flags | frag_flags); 787 788 /* handle the 4K area at the end */ 789 if (frag_end != pe_end) { 790 addr += RADEON_GPU_PAGE_SIZE * count; 791 count = (pe_end - frag_end) / 8; 792 radeon_vm_set_pages(rdev, ib, frag_end, addr, count, 793 RADEON_GPU_PAGE_SIZE, flags); 794 } 795 } 796 797 /** 798 * radeon_vm_update_ptes - make sure that page tables are valid 799 * 800 * @rdev: radeon_device pointer 801 * @vm: requested vm 802 * @start: start of GPU address range 803 * @end: end of GPU address range 804 * @dst: destination address to map to 805 * @flags: mapping flags 806 * 807 * Update the page tables in the range @start - @end (cayman+). 808 * 809 * Global and local mutex must be locked! 810 */ 811 static void radeon_vm_update_ptes(struct radeon_device *rdev, 812 struct radeon_vm *vm, 813 struct radeon_ib *ib, 814 uint64_t start, uint64_t end, 815 uint64_t dst, uint32_t flags) 816 { 817 uint64_t mask = RADEON_VM_PTE_COUNT - 1; 818 uint64_t last_pte = ~0, last_dst = ~0; 819 unsigned count = 0; 820 uint64_t addr; 821 822 /* walk over the address space and update the page tables */ 823 for (addr = start; addr < end; ) { 824 uint64_t pt_idx = addr >> radeon_vm_block_size; 825 struct radeon_bo *pt = vm->page_tables[pt_idx].bo; 826 unsigned nptes; 827 uint64_t pte; 828 829 radeon_semaphore_sync_resv(rdev, ib->semaphore, pt->tbo.resv, false); 830 831 if ((addr & ~mask) == (end & ~mask)) 832 nptes = end - addr; 833 else 834 nptes = RADEON_VM_PTE_COUNT - (addr & mask); 835 836 pte = radeon_bo_gpu_offset(pt); 837 pte += (addr & mask) * 8; 838 839 if ((last_pte + 8 * count) != pte) { 840 841 if (count) { 842 radeon_vm_frag_ptes(rdev, ib, last_pte, 843 last_pte + 8 * count, 844 last_dst, flags); 845 } 846 847 count = nptes; 848 last_pte = pte; 849 last_dst = dst; 850 } else { 851 count += nptes; 852 } 853 854 addr += nptes; 855 dst += nptes * RADEON_GPU_PAGE_SIZE; 856 } 857 858 if (count) { 859 radeon_vm_frag_ptes(rdev, ib, last_pte, 860 last_pte + 8 * count, 861 last_dst, flags); 862 } 863 } 864 865 /** 866 * radeon_vm_bo_update - map a bo into the vm page table 867 * 868 * @rdev: radeon_device pointer 869 * @vm: requested vm 870 * @bo: radeon buffer object 871 * @mem: ttm mem 872 * 873 * Fill in the page table entries for @bo (cayman+). 874 * Returns 0 for success, -EINVAL for failure. 875 * 876 * Object have to be reserved and mutex must be locked! 877 */ 878 int radeon_vm_bo_update(struct radeon_device *rdev, 879 struct radeon_bo_va *bo_va, 880 struct ttm_mem_reg *mem) 881 { 882 struct radeon_vm *vm = bo_va->vm; 883 struct radeon_ib ib; 884 unsigned nptes, ncmds, ndw; 885 uint64_t addr; 886 uint32_t flags; 887 int r; 888 889 if (!bo_va->it.start) { 890 dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", 891 bo_va->bo, vm); 892 return -EINVAL; 893 } 894 895 list_del_init(&bo_va->vm_status); 896 897 bo_va->flags &= ~RADEON_VM_PAGE_VALID; 898 bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; 899 bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED; 900 if (bo_va->bo && radeon_ttm_tt_is_readonly(bo_va->bo->tbo.ttm)) 901 bo_va->flags &= ~RADEON_VM_PAGE_WRITEABLE; 902 903 if (mem) { 904 addr = mem->start << PAGE_SHIFT; 905 if (mem->mem_type != TTM_PL_SYSTEM) { 906 bo_va->flags |= RADEON_VM_PAGE_VALID; 907 } 908 if (mem->mem_type == TTM_PL_TT) { 909 bo_va->flags |= RADEON_VM_PAGE_SYSTEM; 910 if (!(bo_va->bo->flags & (RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC))) 911 bo_va->flags |= RADEON_VM_PAGE_SNOOPED; 912 913 } else { 914 addr += rdev->vm_manager.vram_base_offset; 915 } 916 } else { 917 addr = 0; 918 } 919 920 if (addr == bo_va->addr) 921 return 0; 922 bo_va->addr = addr; 923 924 trace_radeon_vm_bo_update(bo_va); 925 926 nptes = bo_va->it.last - bo_va->it.start + 1; 927 928 /* reserve space for one command every (1 << BLOCK_SIZE) entries 929 or 2k dwords (whatever is smaller) */ 930 ncmds = (nptes >> min(radeon_vm_block_size, 11)) + 1; 931 932 /* padding, etc. */ 933 ndw = 64; 934 935 flags = radeon_vm_page_flags(bo_va->flags); 936 if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { 937 /* only copy commands needed */ 938 ndw += ncmds * 7; 939 940 } else if (flags & R600_PTE_SYSTEM) { 941 /* header for write data commands */ 942 ndw += ncmds * 4; 943 944 /* body of write data command */ 945 ndw += nptes * 2; 946 947 } else { 948 /* set page commands needed */ 949 ndw += ncmds * 10; 950 951 /* two extra commands for begin/end of fragment */ 952 ndw += 2 * 10; 953 } 954 955 /* update too big for an IB */ 956 if (ndw > 0xfffff) 957 return -ENOMEM; 958 959 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); 960 if (r) 961 return r; 962 ib.length_dw = 0; 963 964 radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start, 965 bo_va->it.last + 1, addr, 966 radeon_vm_page_flags(bo_va->flags)); 967 968 radeon_asic_vm_pad_ib(rdev, &ib); 969 WARN_ON(ib.length_dw > ndw); 970 971 radeon_semaphore_sync_fence(ib.semaphore, vm->fence); 972 r = radeon_ib_schedule(rdev, &ib, NULL, false); 973 if (r) { 974 radeon_ib_free(rdev, &ib); 975 return r; 976 } 977 radeon_fence_unref(&vm->fence); 978 vm->fence = radeon_fence_ref(ib.fence); 979 radeon_ib_free(rdev, &ib); 980 radeon_fence_unref(&vm->last_flush); 981 982 return 0; 983 } 984 985 /** 986 * radeon_vm_clear_freed - clear freed BOs in the PT 987 * 988 * @rdev: radeon_device pointer 989 * @vm: requested vm 990 * 991 * Make sure all freed BOs are cleared in the PT. 992 * Returns 0 for success. 993 * 994 * PTs have to be reserved and mutex must be locked! 995 */ 996 int radeon_vm_clear_freed(struct radeon_device *rdev, 997 struct radeon_vm *vm) 998 { 999 struct radeon_bo_va *bo_va, *tmp; 1000 int r; 1001 1002 list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) { 1003 r = radeon_vm_bo_update(rdev, bo_va, NULL); 1004 radeon_bo_unref(&bo_va->bo); 1005 kfree(bo_va); 1006 if (r) 1007 return r; 1008 } 1009 return 0; 1010 1011 } 1012 1013 /** 1014 * radeon_vm_clear_invalids - clear invalidated BOs in the PT 1015 * 1016 * @rdev: radeon_device pointer 1017 * @vm: requested vm 1018 * 1019 * Make sure all invalidated BOs are cleared in the PT. 1020 * Returns 0 for success. 1021 * 1022 * PTs have to be reserved and mutex must be locked! 1023 */ 1024 int radeon_vm_clear_invalids(struct radeon_device *rdev, 1025 struct radeon_vm *vm) 1026 { 1027 struct radeon_bo_va *bo_va, *tmp; 1028 int r; 1029 1030 list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, vm_status) { 1031 r = radeon_vm_bo_update(rdev, bo_va, NULL); 1032 if (r) 1033 return r; 1034 } 1035 return 0; 1036 } 1037 1038 /** 1039 * radeon_vm_bo_rmv - remove a bo to a specific vm 1040 * 1041 * @rdev: radeon_device pointer 1042 * @bo_va: requested bo_va 1043 * 1044 * Remove @bo_va->bo from the requested vm (cayman+). 1045 * 1046 * Object have to be reserved! 1047 */ 1048 void radeon_vm_bo_rmv(struct radeon_device *rdev, 1049 struct radeon_bo_va *bo_va) 1050 { 1051 struct radeon_vm *vm = bo_va->vm; 1052 1053 list_del(&bo_va->bo_list); 1054 1055 mutex_lock(&vm->mutex); 1056 interval_tree_remove(&bo_va->it, &vm->va); 1057 list_del(&bo_va->vm_status); 1058 1059 if (bo_va->addr) { 1060 bo_va->bo = radeon_bo_ref(bo_va->bo); 1061 list_add(&bo_va->vm_status, &vm->freed); 1062 } else { 1063 kfree(bo_va); 1064 } 1065 1066 mutex_unlock(&vm->mutex); 1067 } 1068 1069 /** 1070 * radeon_vm_bo_invalidate - mark the bo as invalid 1071 * 1072 * @rdev: radeon_device pointer 1073 * @vm: requested vm 1074 * @bo: radeon buffer object 1075 * 1076 * Mark @bo as invalid (cayman+). 1077 */ 1078 void radeon_vm_bo_invalidate(struct radeon_device *rdev, 1079 struct radeon_bo *bo) 1080 { 1081 struct radeon_bo_va *bo_va; 1082 1083 list_for_each_entry(bo_va, &bo->va, bo_list) { 1084 if (bo_va->addr) { 1085 mutex_lock(&bo_va->vm->mutex); 1086 list_del(&bo_va->vm_status); 1087 list_add(&bo_va->vm_status, &bo_va->vm->invalidated); 1088 mutex_unlock(&bo_va->vm->mutex); 1089 } 1090 } 1091 } 1092 1093 /** 1094 * radeon_vm_init - initialize a vm instance 1095 * 1096 * @rdev: radeon_device pointer 1097 * @vm: requested vm 1098 * 1099 * Init @vm fields (cayman+). 1100 */ 1101 int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) 1102 { 1103 const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE, 1104 RADEON_VM_PTE_COUNT * 8); 1105 unsigned pd_size, pd_entries, pts_size; 1106 int r; 1107 1108 vm->id = 0; 1109 vm->ib_bo_va = NULL; 1110 vm->fence = NULL; 1111 vm->last_flush = NULL; 1112 vm->last_id_use = NULL; 1113 mutex_init(&vm->mutex); 1114 vm->va = RB_ROOT; 1115 INIT_LIST_HEAD(&vm->invalidated); 1116 INIT_LIST_HEAD(&vm->freed); 1117 1118 pd_size = radeon_vm_directory_size(rdev); 1119 pd_entries = radeon_vm_num_pdes(rdev); 1120 1121 /* allocate page table array */ 1122 pts_size = pd_entries * sizeof(struct radeon_vm_pt); 1123 vm->page_tables = kzalloc(pts_size, GFP_KERNEL); 1124 if (vm->page_tables == NULL) { 1125 DRM_ERROR("Cannot allocate memory for page table array\n"); 1126 return -ENOMEM; 1127 } 1128 1129 r = radeon_bo_create(rdev, pd_size, align, true, 1130 RADEON_GEM_DOMAIN_VRAM, 0, NULL, 1131 NULL, &vm->page_directory); 1132 if (r) 1133 return r; 1134 1135 r = radeon_vm_clear_bo(rdev, vm->page_directory); 1136 if (r) { 1137 radeon_bo_unref(&vm->page_directory); 1138 vm->page_directory = NULL; 1139 return r; 1140 } 1141 1142 return 0; 1143 } 1144 1145 /** 1146 * radeon_vm_fini - tear down a vm instance 1147 * 1148 * @rdev: radeon_device pointer 1149 * @vm: requested vm 1150 * 1151 * Tear down @vm (cayman+). 1152 * Unbind the VM and remove all bos from the vm bo list 1153 */ 1154 void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) 1155 { 1156 struct radeon_bo_va *bo_va, *tmp; 1157 int i, r; 1158 1159 if (!RB_EMPTY_ROOT(&vm->va)) { 1160 dev_err(rdev->dev, "still active bo inside vm\n"); 1161 } 1162 rbtree_postorder_for_each_entry_safe(bo_va, tmp, &vm->va, it.rb) { 1163 interval_tree_remove(&bo_va->it, &vm->va); 1164 r = radeon_bo_reserve(bo_va->bo, false); 1165 if (!r) { 1166 list_del_init(&bo_va->bo_list); 1167 radeon_bo_unreserve(bo_va->bo); 1168 kfree(bo_va); 1169 } 1170 } 1171 list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) { 1172 radeon_bo_unref(&bo_va->bo); 1173 kfree(bo_va); 1174 } 1175 1176 for (i = 0; i < radeon_vm_num_pdes(rdev); i++) 1177 radeon_bo_unref(&vm->page_tables[i].bo); 1178 kfree(vm->page_tables); 1179 1180 radeon_bo_unref(&vm->page_directory); 1181 1182 radeon_fence_unref(&vm->fence); 1183 radeon_fence_unref(&vm->last_flush); 1184 radeon_fence_unref(&vm->last_id_use); 1185 1186 mutex_destroy(&vm->mutex); 1187 } 1188