1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/stop_machine.h> 7 8 #include <asm/set_memory.h> 9 #include <asm/smp.h> 10 11 #include <drm/i915_drm.h> 12 13 #include "gem/i915_gem_lmem.h" 14 15 #include "intel_gt.h" 16 #include "i915_drv.h" 17 #include "i915_scatterlist.h" 18 #include "i915_vgpu.h" 19 20 #include "intel_gtt.h" 21 #include "gen8_ppgtt.h" 22 23 static int 24 i915_get_ggtt_vma_pages(struct i915_vma *vma); 25 26 static void i915_ggtt_color_adjust(const struct drm_mm_node *node, 27 unsigned long color, 28 u64 *start, 29 u64 *end) 30 { 31 if (i915_node_color_differs(node, color)) 32 *start += I915_GTT_PAGE_SIZE; 33 34 /* 35 * Also leave a space between the unallocated reserved node after the 36 * GTT and any objects within the GTT, i.e. we use the color adjustment 37 * to insert a guard page to prevent prefetches crossing over the 38 * GTT boundary. 39 */ 40 node = list_next_entry(node, node_list); 41 if (node->color != color) 42 *end -= I915_GTT_PAGE_SIZE; 43 } 44 45 static int ggtt_init_hw(struct i915_ggtt *ggtt) 46 { 47 struct drm_i915_private *i915 = ggtt->vm.i915; 48 49 i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); 50 51 ggtt->vm.is_ggtt = true; 52 53 /* Only VLV supports read-only GGTT mappings */ 54 ggtt->vm.has_read_only = IS_VALLEYVIEW(i915); 55 56 if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) 57 ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust; 58 59 if (ggtt->mappable_end) { 60 if (!io_mapping_init_wc(&ggtt->iomap, 61 ggtt->gmadr.start, 62 ggtt->mappable_end)) { 63 ggtt->vm.cleanup(&ggtt->vm); 64 return -EIO; 65 } 66 67 ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, 68 ggtt->mappable_end); 69 } 70 71 intel_ggtt_init_fences(ggtt); 72 73 return 0; 74 } 75 76 /** 77 * i915_ggtt_init_hw - Initialize GGTT hardware 78 * @i915: i915 device 79 */ 80 int i915_ggtt_init_hw(struct drm_i915_private *i915) 81 { 82 int ret; 83 84 /* 85 * Note that we use page colouring to enforce a guard page at the 86 * end of the address space. This is required as the CS may prefetch 87 * beyond the end of the batch buffer, across the page boundary, 88 * and beyond the end of the GTT if we do not provide a guard. 89 */ 90 ret = ggtt_init_hw(&i915->ggtt); 91 if (ret) 92 return ret; 93 94 return 0; 95 } 96 97 /* 98 * Certain Gen5 chipsets require idling the GPU before 99 * unmapping anything from the GTT when VT-d is enabled. 100 */ 101 static bool needs_idle_maps(struct drm_i915_private *i915) 102 { 103 /* 104 * Query intel_iommu to see if we need the workaround. Presumably that 105 * was loaded first. 106 */ 107 if (!intel_vtd_active()) 108 return false; 109 110 if (GRAPHICS_VER(i915) == 5 && IS_MOBILE(i915)) 111 return true; 112 113 if (GRAPHICS_VER(i915) == 12) 114 return true; /* XXX DMAR fault reason 7 */ 115 116 return false; 117 } 118 119 void i915_ggtt_suspend(struct i915_ggtt *ggtt) 120 { 121 struct i915_vma *vma, *vn; 122 int open; 123 124 mutex_lock(&ggtt->vm.mutex); 125 126 /* Skip rewriting PTE on VMA unbind. */ 127 open = atomic_xchg(&ggtt->vm.open, 0); 128 129 list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { 130 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); 131 i915_vma_wait_for_bind(vma); 132 133 if (i915_vma_is_pinned(vma)) 134 continue; 135 136 if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { 137 __i915_vma_evict(vma); 138 drm_mm_remove_node(&vma->node); 139 } 140 } 141 142 ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); 143 ggtt->invalidate(ggtt); 144 atomic_set(&ggtt->vm.open, open); 145 146 mutex_unlock(&ggtt->vm.mutex); 147 148 intel_gt_check_and_clear_faults(ggtt->vm.gt); 149 } 150 151 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) 152 { 153 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 154 155 spin_lock_irq(&uncore->lock); 156 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 157 intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6); 158 spin_unlock_irq(&uncore->lock); 159 } 160 161 static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) 162 { 163 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 164 165 /* 166 * Note that as an uncached mmio write, this will flush the 167 * WCB of the writes into the GGTT before it triggers the invalidate. 168 */ 169 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 170 } 171 172 static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) 173 { 174 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 175 struct drm_i915_private *i915 = ggtt->vm.i915; 176 177 gen8_ggtt_invalidate(ggtt); 178 179 if (GRAPHICS_VER(i915) >= 12) 180 intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, 181 GEN12_GUC_TLB_INV_CR_INVALIDATE); 182 else 183 intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); 184 } 185 186 static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) 187 { 188 intel_gtt_chipset_flush(); 189 } 190 191 u64 gen8_ggtt_pte_encode(dma_addr_t addr, 192 enum i915_cache_level level, 193 u32 flags) 194 { 195 gen8_pte_t pte = addr | _PAGE_PRESENT; 196 197 if (flags & PTE_LM) 198 pte |= GEN12_GGTT_PTE_LM; 199 200 return pte; 201 } 202 203 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 204 { 205 writeq(pte, addr); 206 } 207 208 static void gen8_ggtt_insert_page(struct i915_address_space *vm, 209 dma_addr_t addr, 210 u64 offset, 211 enum i915_cache_level level, 212 u32 flags) 213 { 214 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 215 gen8_pte_t __iomem *pte = 216 (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 217 218 gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, flags)); 219 220 ggtt->invalidate(ggtt); 221 } 222 223 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 224 struct i915_vma *vma, 225 enum i915_cache_level level, 226 u32 flags) 227 { 228 const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, flags); 229 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 230 gen8_pte_t __iomem *gte; 231 gen8_pte_t __iomem *end; 232 struct sgt_iter iter; 233 dma_addr_t addr; 234 235 /* 236 * Note that we ignore PTE_READ_ONLY here. The caller must be careful 237 * not to allow the user to override access to a read only page. 238 */ 239 240 gte = (gen8_pte_t __iomem *)ggtt->gsm; 241 gte += vma->node.start / I915_GTT_PAGE_SIZE; 242 end = gte + vma->node.size / I915_GTT_PAGE_SIZE; 243 244 for_each_sgt_daddr(addr, iter, vma->pages) 245 gen8_set_pte(gte++, pte_encode | addr); 246 GEM_BUG_ON(gte > end); 247 248 /* Fill the allocated but "unused" space beyond the end of the buffer */ 249 while (gte < end) 250 gen8_set_pte(gte++, vm->scratch[0]->encode); 251 252 /* 253 * We want to flush the TLBs only after we're certain all the PTE 254 * updates have finished. 255 */ 256 ggtt->invalidate(ggtt); 257 } 258 259 static void gen6_ggtt_insert_page(struct i915_address_space *vm, 260 dma_addr_t addr, 261 u64 offset, 262 enum i915_cache_level level, 263 u32 flags) 264 { 265 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 266 gen6_pte_t __iomem *pte = 267 (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 268 269 iowrite32(vm->pte_encode(addr, level, flags), pte); 270 271 ggtt->invalidate(ggtt); 272 } 273 274 /* 275 * Binds an object into the global gtt with the specified cache level. 276 * The object will be accessible to the GPU via commands whose operands 277 * reference offsets within the global GTT as well as accessible by the GPU 278 * through the GMADR mapped BAR (i915->mm.gtt->gtt). 279 */ 280 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 281 struct i915_vma *vma, 282 enum i915_cache_level level, 283 u32 flags) 284 { 285 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 286 gen6_pte_t __iomem *gte; 287 gen6_pte_t __iomem *end; 288 struct sgt_iter iter; 289 dma_addr_t addr; 290 291 gte = (gen6_pte_t __iomem *)ggtt->gsm; 292 gte += vma->node.start / I915_GTT_PAGE_SIZE; 293 end = gte + vma->node.size / I915_GTT_PAGE_SIZE; 294 295 for_each_sgt_daddr(addr, iter, vma->pages) 296 iowrite32(vm->pte_encode(addr, level, flags), gte++); 297 GEM_BUG_ON(gte > end); 298 299 /* Fill the allocated but "unused" space beyond the end of the buffer */ 300 while (gte < end) 301 iowrite32(vm->scratch[0]->encode, gte++); 302 303 /* 304 * We want to flush the TLBs only after we're certain all the PTE 305 * updates have finished. 306 */ 307 ggtt->invalidate(ggtt); 308 } 309 310 static void nop_clear_range(struct i915_address_space *vm, 311 u64 start, u64 length) 312 { 313 } 314 315 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 316 u64 start, u64 length) 317 { 318 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 319 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 320 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 321 const gen8_pte_t scratch_pte = vm->scratch[0]->encode; 322 gen8_pte_t __iomem *gtt_base = 323 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 324 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 325 int i; 326 327 if (WARN(num_entries > max_entries, 328 "First entry = %d; Num entries = %d (max=%d)\n", 329 first_entry, num_entries, max_entries)) 330 num_entries = max_entries; 331 332 for (i = 0; i < num_entries; i++) 333 gen8_set_pte(>t_base[i], scratch_pte); 334 } 335 336 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) 337 { 338 /* 339 * Make sure the internal GAM fifo has been cleared of all GTT 340 * writes before exiting stop_machine(). This guarantees that 341 * any aperture accesses waiting to start in another process 342 * cannot back up behind the GTT writes causing a hang. 343 * The register can be any arbitrary GAM register. 344 */ 345 intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6); 346 } 347 348 struct insert_page { 349 struct i915_address_space *vm; 350 dma_addr_t addr; 351 u64 offset; 352 enum i915_cache_level level; 353 }; 354 355 static int bxt_vtd_ggtt_insert_page__cb(void *_arg) 356 { 357 struct insert_page *arg = _arg; 358 359 gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0); 360 bxt_vtd_ggtt_wa(arg->vm); 361 362 return 0; 363 } 364 365 static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, 366 dma_addr_t addr, 367 u64 offset, 368 enum i915_cache_level level, 369 u32 unused) 370 { 371 struct insert_page arg = { vm, addr, offset, level }; 372 373 stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); 374 } 375 376 struct insert_entries { 377 struct i915_address_space *vm; 378 struct i915_vma *vma; 379 enum i915_cache_level level; 380 u32 flags; 381 }; 382 383 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) 384 { 385 struct insert_entries *arg = _arg; 386 387 gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags); 388 bxt_vtd_ggtt_wa(arg->vm); 389 390 return 0; 391 } 392 393 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, 394 struct i915_vma *vma, 395 enum i915_cache_level level, 396 u32 flags) 397 { 398 struct insert_entries arg = { vm, vma, level, flags }; 399 400 stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); 401 } 402 403 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 404 u64 start, u64 length) 405 { 406 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 407 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 408 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 409 gen6_pte_t scratch_pte, __iomem *gtt_base = 410 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 411 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 412 int i; 413 414 if (WARN(num_entries > max_entries, 415 "First entry = %d; Num entries = %d (max=%d)\n", 416 first_entry, num_entries, max_entries)) 417 num_entries = max_entries; 418 419 scratch_pte = vm->scratch[0]->encode; 420 for (i = 0; i < num_entries; i++) 421 iowrite32(scratch_pte, >t_base[i]); 422 } 423 424 static void i915_ggtt_insert_page(struct i915_address_space *vm, 425 dma_addr_t addr, 426 u64 offset, 427 enum i915_cache_level cache_level, 428 u32 unused) 429 { 430 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 431 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 432 433 intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); 434 } 435 436 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 437 struct i915_vma *vma, 438 enum i915_cache_level cache_level, 439 u32 unused) 440 { 441 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 442 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 443 444 intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT, 445 flags); 446 } 447 448 static void i915_ggtt_clear_range(struct i915_address_space *vm, 449 u64 start, u64 length) 450 { 451 intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); 452 } 453 454 static void ggtt_bind_vma(struct i915_address_space *vm, 455 struct i915_vm_pt_stash *stash, 456 struct i915_vma *vma, 457 enum i915_cache_level cache_level, 458 u32 flags) 459 { 460 struct drm_i915_gem_object *obj = vma->obj; 461 u32 pte_flags; 462 463 if (i915_vma_is_bound(vma, ~flags & I915_VMA_BIND_MASK)) 464 return; 465 466 /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ 467 pte_flags = 0; 468 if (i915_gem_object_is_readonly(obj)) 469 pte_flags |= PTE_READ_ONLY; 470 if (i915_gem_object_is_lmem(obj)) 471 pte_flags |= PTE_LM; 472 473 vm->insert_entries(vm, vma, cache_level, pte_flags); 474 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; 475 } 476 477 static void ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma) 478 { 479 vm->clear_range(vm, vma->node.start, vma->size); 480 } 481 482 static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) 483 { 484 u64 size; 485 int ret; 486 487 if (!intel_uc_uses_guc(&ggtt->vm.gt->uc)) 488 return 0; 489 490 GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); 491 size = ggtt->vm.total - GUC_GGTT_TOP; 492 493 ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size, 494 GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, 495 PIN_NOEVICT); 496 if (ret) 497 drm_dbg(&ggtt->vm.i915->drm, 498 "Failed to reserve top of GGTT for GuC\n"); 499 500 return ret; 501 } 502 503 static void ggtt_release_guc_top(struct i915_ggtt *ggtt) 504 { 505 if (drm_mm_node_allocated(&ggtt->uc_fw)) 506 drm_mm_remove_node(&ggtt->uc_fw); 507 } 508 509 static void cleanup_init_ggtt(struct i915_ggtt *ggtt) 510 { 511 ggtt_release_guc_top(ggtt); 512 if (drm_mm_node_allocated(&ggtt->error_capture)) 513 drm_mm_remove_node(&ggtt->error_capture); 514 mutex_destroy(&ggtt->error_mutex); 515 } 516 517 static int init_ggtt(struct i915_ggtt *ggtt) 518 { 519 /* 520 * Let GEM Manage all of the aperture. 521 * 522 * However, leave one page at the end still bound to the scratch page. 523 * There are a number of places where the hardware apparently prefetches 524 * past the end of the object, and we've seen multiple hangs with the 525 * GPU head pointer stuck in a batchbuffer bound at the last page of the 526 * aperture. One page should be enough to keep any prefetching inside 527 * of the aperture. 528 */ 529 unsigned long hole_start, hole_end; 530 struct drm_mm_node *entry; 531 int ret; 532 533 /* 534 * GuC requires all resources that we're sharing with it to be placed in 535 * non-WOPCM memory. If GuC is not present or not in use we still need a 536 * small bias as ring wraparound at offset 0 sometimes hangs. No idea 537 * why. 538 */ 539 ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE, 540 intel_wopcm_guc_size(&ggtt->vm.i915->wopcm)); 541 542 ret = intel_vgt_balloon(ggtt); 543 if (ret) 544 return ret; 545 546 mutex_init(&ggtt->error_mutex); 547 if (ggtt->mappable_end) { 548 /* 549 * Reserve a mappable slot for our lockless error capture. 550 * 551 * We strongly prefer taking address 0x0 in order to protect 552 * other critical buffers against accidental overwrites, 553 * as writing to address 0 is a very common mistake. 554 * 555 * Since 0 may already be in use by the system (e.g. the BIOS 556 * framebuffer), we let the reservation fail quietly and hope 557 * 0 remains reserved always. 558 * 559 * If we fail to reserve 0, and then fail to find any space 560 * for an error-capture, remain silent. We can afford not 561 * to reserve an error_capture node as we have fallback 562 * paths, and we trust that 0 will remain reserved. However, 563 * the only likely reason for failure to insert is a driver 564 * bug, which we expect to cause other failures... 565 */ 566 ggtt->error_capture.size = I915_GTT_PAGE_SIZE; 567 ggtt->error_capture.color = I915_COLOR_UNEVICTABLE; 568 if (drm_mm_reserve_node(&ggtt->vm.mm, &ggtt->error_capture)) 569 drm_mm_insert_node_in_range(&ggtt->vm.mm, 570 &ggtt->error_capture, 571 ggtt->error_capture.size, 0, 572 ggtt->error_capture.color, 573 0, ggtt->mappable_end, 574 DRM_MM_INSERT_LOW); 575 } 576 if (drm_mm_node_allocated(&ggtt->error_capture)) 577 drm_dbg(&ggtt->vm.i915->drm, 578 "Reserved GGTT:[%llx, %llx] for use by error capture\n", 579 ggtt->error_capture.start, 580 ggtt->error_capture.start + ggtt->error_capture.size); 581 582 /* 583 * The upper portion of the GuC address space has a sizeable hole 584 * (several MB) that is inaccessible by GuC. Reserve this range within 585 * GGTT as it can comfortably hold GuC/HuC firmware images. 586 */ 587 ret = ggtt_reserve_guc_top(ggtt); 588 if (ret) 589 goto err; 590 591 /* Clear any non-preallocated blocks */ 592 drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { 593 drm_dbg(&ggtt->vm.i915->drm, 594 "clearing unused GTT space: [%lx, %lx]\n", 595 hole_start, hole_end); 596 ggtt->vm.clear_range(&ggtt->vm, hole_start, 597 hole_end - hole_start); 598 } 599 600 /* And finally clear the reserved guard page */ 601 ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE); 602 603 return 0; 604 605 err: 606 cleanup_init_ggtt(ggtt); 607 return ret; 608 } 609 610 static void aliasing_gtt_bind_vma(struct i915_address_space *vm, 611 struct i915_vm_pt_stash *stash, 612 struct i915_vma *vma, 613 enum i915_cache_level cache_level, 614 u32 flags) 615 { 616 u32 pte_flags; 617 618 /* Currently applicable only to VLV */ 619 pte_flags = 0; 620 if (i915_gem_object_is_readonly(vma->obj)) 621 pte_flags |= PTE_READ_ONLY; 622 623 if (flags & I915_VMA_LOCAL_BIND) 624 ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm, 625 stash, vma, cache_level, flags); 626 627 if (flags & I915_VMA_GLOBAL_BIND) 628 vm->insert_entries(vm, vma, cache_level, pte_flags); 629 } 630 631 static void aliasing_gtt_unbind_vma(struct i915_address_space *vm, 632 struct i915_vma *vma) 633 { 634 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 635 vm->clear_range(vm, vma->node.start, vma->size); 636 637 if (i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) 638 ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma); 639 } 640 641 static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) 642 { 643 struct i915_vm_pt_stash stash = {}; 644 struct i915_ppgtt *ppgtt; 645 int err; 646 647 ppgtt = i915_ppgtt_create(ggtt->vm.gt, 0); 648 if (IS_ERR(ppgtt)) 649 return PTR_ERR(ppgtt); 650 651 if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) { 652 err = -ENODEV; 653 goto err_ppgtt; 654 } 655 656 err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, ggtt->vm.total); 657 if (err) 658 goto err_ppgtt; 659 660 i915_gem_object_lock(ppgtt->vm.scratch[0], NULL); 661 err = i915_vm_map_pt_stash(&ppgtt->vm, &stash); 662 i915_gem_object_unlock(ppgtt->vm.scratch[0]); 663 if (err) 664 goto err_stash; 665 666 /* 667 * Note we only pre-allocate as far as the end of the global 668 * GTT. On 48b / 4-level page-tables, the difference is very, 669 * very significant! We have to preallocate as GVT/vgpu does 670 * not like the page directory disappearing. 671 */ 672 ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total); 673 674 ggtt->alias = ppgtt; 675 ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; 676 677 GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma); 678 ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; 679 680 GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); 681 ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; 682 683 i915_vm_free_pt_stash(&ppgtt->vm, &stash); 684 return 0; 685 686 err_stash: 687 i915_vm_free_pt_stash(&ppgtt->vm, &stash); 688 err_ppgtt: 689 i915_vm_put(&ppgtt->vm); 690 return err; 691 } 692 693 static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) 694 { 695 struct i915_ppgtt *ppgtt; 696 697 ppgtt = fetch_and_zero(&ggtt->alias); 698 if (!ppgtt) 699 return; 700 701 i915_vm_put(&ppgtt->vm); 702 703 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 704 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 705 } 706 707 int i915_init_ggtt(struct drm_i915_private *i915) 708 { 709 int ret; 710 711 ret = init_ggtt(&i915->ggtt); 712 if (ret) 713 return ret; 714 715 if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) { 716 ret = init_aliasing_ppgtt(&i915->ggtt); 717 if (ret) 718 cleanup_init_ggtt(&i915->ggtt); 719 } 720 721 return 0; 722 } 723 724 static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) 725 { 726 struct i915_vma *vma, *vn; 727 728 atomic_set(&ggtt->vm.open, 0); 729 730 flush_workqueue(ggtt->vm.i915->wq); 731 732 mutex_lock(&ggtt->vm.mutex); 733 734 list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) 735 WARN_ON(__i915_vma_unbind(vma)); 736 737 if (drm_mm_node_allocated(&ggtt->error_capture)) 738 drm_mm_remove_node(&ggtt->error_capture); 739 mutex_destroy(&ggtt->error_mutex); 740 741 ggtt_release_guc_top(ggtt); 742 intel_vgt_deballoon(ggtt); 743 744 ggtt->vm.cleanup(&ggtt->vm); 745 746 mutex_unlock(&ggtt->vm.mutex); 747 i915_address_space_fini(&ggtt->vm); 748 749 arch_phys_wc_del(ggtt->mtrr); 750 751 if (ggtt->iomap.size) 752 io_mapping_fini(&ggtt->iomap); 753 } 754 755 /** 756 * i915_ggtt_driver_release - Clean up GGTT hardware initialization 757 * @i915: i915 device 758 */ 759 void i915_ggtt_driver_release(struct drm_i915_private *i915) 760 { 761 struct i915_ggtt *ggtt = &i915->ggtt; 762 763 fini_aliasing_ppgtt(ggtt); 764 765 intel_ggtt_fini_fences(ggtt); 766 ggtt_cleanup_hw(ggtt); 767 } 768 769 /** 770 * i915_ggtt_driver_late_release - Cleanup of GGTT that needs to be done after 771 * all free objects have been drained. 772 * @i915: i915 device 773 */ 774 void i915_ggtt_driver_late_release(struct drm_i915_private *i915) 775 { 776 struct i915_ggtt *ggtt = &i915->ggtt; 777 778 GEM_WARN_ON(kref_read(&ggtt->vm.resv_ref) != 1); 779 dma_resv_fini(&ggtt->vm._resv); 780 } 781 782 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 783 { 784 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 785 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 786 return snb_gmch_ctl << 20; 787 } 788 789 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 790 { 791 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 792 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 793 if (bdw_gmch_ctl) 794 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 795 796 #ifdef CONFIG_X86_32 797 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ 798 if (bdw_gmch_ctl > 4) 799 bdw_gmch_ctl = 4; 800 #endif 801 802 return bdw_gmch_ctl << 20; 803 } 804 805 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 806 { 807 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 808 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 809 810 if (gmch_ctrl) 811 return 1 << (20 + gmch_ctrl); 812 813 return 0; 814 } 815 816 static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915) 817 { 818 /* 819 * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset 820 * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset 821 */ 822 GEM_BUG_ON(GRAPHICS_VER(i915) < 6); 823 return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M; 824 } 825 826 static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915) 827 { 828 return gen6_gttmmadr_size(i915) / 2; 829 } 830 831 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 832 { 833 struct drm_i915_private *i915 = ggtt->vm.i915; 834 struct pci_dev *pdev = to_pci_dev(i915->drm.dev); 835 phys_addr_t phys_addr; 836 u32 pte_flags; 837 int ret; 838 839 GEM_WARN_ON(pci_resource_len(pdev, 0) != gen6_gttmmadr_size(i915)); 840 phys_addr = pci_resource_start(pdev, 0) + gen6_gttadr_offset(i915); 841 842 /* 843 * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range 844 * will be dropped. For WC mappings in general we have 64 byte burst 845 * writes when the WC buffer is flushed, so we can't use it, but have to 846 * resort to an uncached mapping. The WC issue is easily caught by the 847 * readback check when writing GTT PTE entries. 848 */ 849 if (IS_GEN9_LP(i915) || GRAPHICS_VER(i915) >= 11) 850 ggtt->gsm = ioremap(phys_addr, size); 851 else 852 ggtt->gsm = ioremap_wc(phys_addr, size); 853 if (!ggtt->gsm) { 854 drm_err(&i915->drm, "Failed to map the ggtt page table\n"); 855 return -ENOMEM; 856 } 857 858 kref_init(&ggtt->vm.resv_ref); 859 ret = setup_scratch_page(&ggtt->vm); 860 if (ret) { 861 drm_err(&i915->drm, "Scratch setup failed\n"); 862 /* iounmap will also get called at remove, but meh */ 863 iounmap(ggtt->gsm); 864 return ret; 865 } 866 867 pte_flags = 0; 868 if (i915_gem_object_is_lmem(ggtt->vm.scratch[0])) 869 pte_flags |= PTE_LM; 870 871 ggtt->vm.scratch[0]->encode = 872 ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]), 873 I915_CACHE_NONE, pte_flags); 874 875 return 0; 876 } 877 878 int ggtt_set_pages(struct i915_vma *vma) 879 { 880 int ret; 881 882 GEM_BUG_ON(vma->pages); 883 884 ret = i915_get_ggtt_vma_pages(vma); 885 if (ret) 886 return ret; 887 888 vma->page_sizes = vma->obj->mm.page_sizes; 889 890 return 0; 891 } 892 893 static void gen6_gmch_remove(struct i915_address_space *vm) 894 { 895 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 896 897 iounmap(ggtt->gsm); 898 free_scratch(vm); 899 } 900 901 static struct resource pci_resource(struct pci_dev *pdev, int bar) 902 { 903 return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar), 904 pci_resource_len(pdev, bar)); 905 } 906 907 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 908 { 909 struct drm_i915_private *i915 = ggtt->vm.i915; 910 struct pci_dev *pdev = to_pci_dev(i915->drm.dev); 911 unsigned int size; 912 u16 snb_gmch_ctl; 913 914 /* TODO: We're not aware of mappable constraints on gen8 yet */ 915 if (!HAS_LMEM(i915)) { 916 ggtt->gmadr = pci_resource(pdev, 2); 917 ggtt->mappable_end = resource_size(&ggtt->gmadr); 918 } 919 920 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 921 if (IS_CHERRYVIEW(i915)) 922 size = chv_get_total_gtt_size(snb_gmch_ctl); 923 else 924 size = gen8_get_total_gtt_size(snb_gmch_ctl); 925 926 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 927 ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY; 928 929 ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; 930 ggtt->vm.cleanup = gen6_gmch_remove; 931 ggtt->vm.insert_page = gen8_ggtt_insert_page; 932 ggtt->vm.clear_range = nop_clear_range; 933 if (intel_scanout_needs_vtd_wa(i915)) 934 ggtt->vm.clear_range = gen8_ggtt_clear_range; 935 936 ggtt->vm.insert_entries = gen8_ggtt_insert_entries; 937 938 /* 939 * Serialize GTT updates with aperture access on BXT if VT-d is on, 940 * and always on CHV. 941 */ 942 if (intel_vm_no_concurrent_access_wa(i915)) { 943 ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; 944 ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; 945 ggtt->vm.bind_async_flags = 946 I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; 947 } 948 949 ggtt->invalidate = gen8_ggtt_invalidate; 950 951 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 952 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 953 ggtt->vm.vma_ops.set_pages = ggtt_set_pages; 954 ggtt->vm.vma_ops.clear_pages = clear_pages; 955 956 ggtt->vm.pte_encode = gen8_ggtt_pte_encode; 957 958 setup_private_pat(ggtt->vm.gt->uncore); 959 960 return ggtt_probe_common(ggtt, size); 961 } 962 963 static u64 snb_pte_encode(dma_addr_t addr, 964 enum i915_cache_level level, 965 u32 flags) 966 { 967 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 968 969 switch (level) { 970 case I915_CACHE_L3_LLC: 971 case I915_CACHE_LLC: 972 pte |= GEN6_PTE_CACHE_LLC; 973 break; 974 case I915_CACHE_NONE: 975 pte |= GEN6_PTE_UNCACHED; 976 break; 977 default: 978 MISSING_CASE(level); 979 } 980 981 return pte; 982 } 983 984 static u64 ivb_pte_encode(dma_addr_t addr, 985 enum i915_cache_level level, 986 u32 flags) 987 { 988 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 989 990 switch (level) { 991 case I915_CACHE_L3_LLC: 992 pte |= GEN7_PTE_CACHE_L3_LLC; 993 break; 994 case I915_CACHE_LLC: 995 pte |= GEN6_PTE_CACHE_LLC; 996 break; 997 case I915_CACHE_NONE: 998 pte |= GEN6_PTE_UNCACHED; 999 break; 1000 default: 1001 MISSING_CASE(level); 1002 } 1003 1004 return pte; 1005 } 1006 1007 static u64 byt_pte_encode(dma_addr_t addr, 1008 enum i915_cache_level level, 1009 u32 flags) 1010 { 1011 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1012 1013 if (!(flags & PTE_READ_ONLY)) 1014 pte |= BYT_PTE_WRITEABLE; 1015 1016 if (level != I915_CACHE_NONE) 1017 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 1018 1019 return pte; 1020 } 1021 1022 static u64 hsw_pte_encode(dma_addr_t addr, 1023 enum i915_cache_level level, 1024 u32 flags) 1025 { 1026 gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1027 1028 if (level != I915_CACHE_NONE) 1029 pte |= HSW_WB_LLC_AGE3; 1030 1031 return pte; 1032 } 1033 1034 static u64 iris_pte_encode(dma_addr_t addr, 1035 enum i915_cache_level level, 1036 u32 flags) 1037 { 1038 gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 1039 1040 switch (level) { 1041 case I915_CACHE_NONE: 1042 break; 1043 case I915_CACHE_WT: 1044 pte |= HSW_WT_ELLC_LLC_AGE3; 1045 break; 1046 default: 1047 pte |= HSW_WB_ELLC_LLC_AGE3; 1048 break; 1049 } 1050 1051 return pte; 1052 } 1053 1054 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 1055 { 1056 struct drm_i915_private *i915 = ggtt->vm.i915; 1057 struct pci_dev *pdev = to_pci_dev(i915->drm.dev); 1058 unsigned int size; 1059 u16 snb_gmch_ctl; 1060 1061 ggtt->gmadr = pci_resource(pdev, 2); 1062 ggtt->mappable_end = resource_size(&ggtt->gmadr); 1063 1064 /* 1065 * 64/512MB is the current min/max we actually know of, but this is 1066 * just a coarse sanity check. 1067 */ 1068 if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { 1069 drm_err(&i915->drm, "Unknown GMADR size (%pa)\n", 1070 &ggtt->mappable_end); 1071 return -ENXIO; 1072 } 1073 1074 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1075 1076 size = gen6_get_total_gtt_size(snb_gmch_ctl); 1077 ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; 1078 1079 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 1080 1081 ggtt->vm.clear_range = nop_clear_range; 1082 if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915)) 1083 ggtt->vm.clear_range = gen6_ggtt_clear_range; 1084 ggtt->vm.insert_page = gen6_ggtt_insert_page; 1085 ggtt->vm.insert_entries = gen6_ggtt_insert_entries; 1086 ggtt->vm.cleanup = gen6_gmch_remove; 1087 1088 ggtt->invalidate = gen6_ggtt_invalidate; 1089 1090 if (HAS_EDRAM(i915)) 1091 ggtt->vm.pte_encode = iris_pte_encode; 1092 else if (IS_HASWELL(i915)) 1093 ggtt->vm.pte_encode = hsw_pte_encode; 1094 else if (IS_VALLEYVIEW(i915)) 1095 ggtt->vm.pte_encode = byt_pte_encode; 1096 else if (GRAPHICS_VER(i915) >= 7) 1097 ggtt->vm.pte_encode = ivb_pte_encode; 1098 else 1099 ggtt->vm.pte_encode = snb_pte_encode; 1100 1101 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 1102 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 1103 ggtt->vm.vma_ops.set_pages = ggtt_set_pages; 1104 ggtt->vm.vma_ops.clear_pages = clear_pages; 1105 1106 return ggtt_probe_common(ggtt, size); 1107 } 1108 1109 static void i915_gmch_remove(struct i915_address_space *vm) 1110 { 1111 intel_gmch_remove(); 1112 } 1113 1114 static int i915_gmch_probe(struct i915_ggtt *ggtt) 1115 { 1116 struct drm_i915_private *i915 = ggtt->vm.i915; 1117 phys_addr_t gmadr_base; 1118 int ret; 1119 1120 ret = intel_gmch_probe(i915->bridge_dev, to_pci_dev(i915->drm.dev), NULL); 1121 if (!ret) { 1122 drm_err(&i915->drm, "failed to set up gmch\n"); 1123 return -EIO; 1124 } 1125 1126 intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end); 1127 1128 ggtt->gmadr = 1129 (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); 1130 1131 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 1132 1133 if (needs_idle_maps(i915)) { 1134 drm_notice(&i915->drm, 1135 "Flushing DMA requests before IOMMU unmaps; performance may be degraded\n"); 1136 ggtt->do_idle_maps = true; 1137 } 1138 1139 ggtt->vm.insert_page = i915_ggtt_insert_page; 1140 ggtt->vm.insert_entries = i915_ggtt_insert_entries; 1141 ggtt->vm.clear_range = i915_ggtt_clear_range; 1142 ggtt->vm.cleanup = i915_gmch_remove; 1143 1144 ggtt->invalidate = gmch_ggtt_invalidate; 1145 1146 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 1147 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 1148 ggtt->vm.vma_ops.set_pages = ggtt_set_pages; 1149 ggtt->vm.vma_ops.clear_pages = clear_pages; 1150 1151 if (unlikely(ggtt->do_idle_maps)) 1152 drm_notice(&i915->drm, 1153 "Applying Ironlake quirks for intel_iommu\n"); 1154 1155 return 0; 1156 } 1157 1158 static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) 1159 { 1160 struct drm_i915_private *i915 = gt->i915; 1161 int ret; 1162 1163 ggtt->vm.gt = gt; 1164 ggtt->vm.i915 = i915; 1165 ggtt->vm.dma = i915->drm.dev; 1166 dma_resv_init(&ggtt->vm._resv); 1167 1168 if (GRAPHICS_VER(i915) <= 5) 1169 ret = i915_gmch_probe(ggtt); 1170 else if (GRAPHICS_VER(i915) < 8) 1171 ret = gen6_gmch_probe(ggtt); 1172 else 1173 ret = gen8_gmch_probe(ggtt); 1174 if (ret) { 1175 dma_resv_fini(&ggtt->vm._resv); 1176 return ret; 1177 } 1178 1179 if ((ggtt->vm.total - 1) >> 32) { 1180 drm_err(&i915->drm, 1181 "We never expected a Global GTT with more than 32bits" 1182 " of address space! Found %lldM!\n", 1183 ggtt->vm.total >> 20); 1184 ggtt->vm.total = 1ULL << 32; 1185 ggtt->mappable_end = 1186 min_t(u64, ggtt->mappable_end, ggtt->vm.total); 1187 } 1188 1189 if (ggtt->mappable_end > ggtt->vm.total) { 1190 drm_err(&i915->drm, 1191 "mappable aperture extends past end of GGTT," 1192 " aperture=%pa, total=%llx\n", 1193 &ggtt->mappable_end, ggtt->vm.total); 1194 ggtt->mappable_end = ggtt->vm.total; 1195 } 1196 1197 /* GMADR is the PCI mmio aperture into the global GTT. */ 1198 drm_dbg(&i915->drm, "GGTT size = %lluM\n", ggtt->vm.total >> 20); 1199 drm_dbg(&i915->drm, "GMADR size = %lluM\n", 1200 (u64)ggtt->mappable_end >> 20); 1201 drm_dbg(&i915->drm, "DSM size = %lluM\n", 1202 (u64)resource_size(&intel_graphics_stolen_res) >> 20); 1203 1204 return 0; 1205 } 1206 1207 /** 1208 * i915_ggtt_probe_hw - Probe GGTT hardware location 1209 * @i915: i915 device 1210 */ 1211 int i915_ggtt_probe_hw(struct drm_i915_private *i915) 1212 { 1213 int ret; 1214 1215 ret = ggtt_probe_hw(&i915->ggtt, &i915->gt); 1216 if (ret) 1217 return ret; 1218 1219 if (intel_vtd_active()) 1220 drm_info(&i915->drm, "VT-d active for gfx access\n"); 1221 1222 return 0; 1223 } 1224 1225 int i915_ggtt_enable_hw(struct drm_i915_private *i915) 1226 { 1227 if (GRAPHICS_VER(i915) < 6 && !intel_enable_gtt()) 1228 return -EIO; 1229 1230 return 0; 1231 } 1232 1233 void i915_ggtt_enable_guc(struct i915_ggtt *ggtt) 1234 { 1235 GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate); 1236 1237 ggtt->invalidate = guc_ggtt_invalidate; 1238 1239 ggtt->invalidate(ggtt); 1240 } 1241 1242 void i915_ggtt_disable_guc(struct i915_ggtt *ggtt) 1243 { 1244 /* XXX Temporary pardon for error unload */ 1245 if (ggtt->invalidate == gen8_ggtt_invalidate) 1246 return; 1247 1248 /* We should only be called after i915_ggtt_enable_guc() */ 1249 GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate); 1250 1251 ggtt->invalidate = gen8_ggtt_invalidate; 1252 1253 ggtt->invalidate(ggtt); 1254 } 1255 1256 void i915_ggtt_resume(struct i915_ggtt *ggtt) 1257 { 1258 struct i915_vma *vma; 1259 bool flush = false; 1260 int open; 1261 1262 intel_gt_check_and_clear_faults(ggtt->vm.gt); 1263 1264 /* First fill our portion of the GTT with scratch pages */ 1265 ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); 1266 1267 /* Skip rewriting PTE on VMA unbind. */ 1268 open = atomic_xchg(&ggtt->vm.open, 0); 1269 1270 /* clflush objects bound into the GGTT and rebind them. */ 1271 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) { 1272 struct drm_i915_gem_object *obj = vma->obj; 1273 unsigned int was_bound = 1274 atomic_read(&vma->flags) & I915_VMA_BIND_MASK; 1275 1276 GEM_BUG_ON(!was_bound); 1277 vma->ops->bind_vma(&ggtt->vm, NULL, vma, 1278 obj ? obj->cache_level : 0, 1279 was_bound); 1280 if (obj) { /* only used during resume => exclusive access */ 1281 flush |= fetch_and_zero(&obj->write_domain); 1282 obj->read_domains |= I915_GEM_DOMAIN_GTT; 1283 } 1284 } 1285 1286 atomic_set(&ggtt->vm.open, open); 1287 ggtt->invalidate(ggtt); 1288 1289 if (flush) 1290 wbinvd_on_all_cpus(); 1291 1292 if (GRAPHICS_VER(ggtt->vm.i915) >= 8) 1293 setup_private_pat(ggtt->vm.gt->uncore); 1294 1295 intel_ggtt_restore_fences(ggtt); 1296 } 1297 1298 static struct scatterlist * 1299 rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset, 1300 unsigned int width, unsigned int height, 1301 unsigned int src_stride, unsigned int dst_stride, 1302 struct sg_table *st, struct scatterlist *sg) 1303 { 1304 unsigned int column, row; 1305 unsigned int src_idx; 1306 1307 for (column = 0; column < width; column++) { 1308 unsigned int left; 1309 1310 src_idx = src_stride * (height - 1) + column + offset; 1311 for (row = 0; row < height; row++) { 1312 st->nents++; 1313 /* 1314 * We don't need the pages, but need to initialize 1315 * the entries so the sg list can be happily traversed. 1316 * The only thing we need are DMA addresses. 1317 */ 1318 sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0); 1319 sg_dma_address(sg) = 1320 i915_gem_object_get_dma_address(obj, src_idx); 1321 sg_dma_len(sg) = I915_GTT_PAGE_SIZE; 1322 sg = sg_next(sg); 1323 src_idx -= src_stride; 1324 } 1325 1326 left = (dst_stride - height) * I915_GTT_PAGE_SIZE; 1327 1328 if (!left) 1329 continue; 1330 1331 st->nents++; 1332 1333 /* 1334 * The DE ignores the PTEs for the padding tiles, the sg entry 1335 * here is just a conenience to indicate how many padding PTEs 1336 * to insert at this spot. 1337 */ 1338 sg_set_page(sg, NULL, left, 0); 1339 sg_dma_address(sg) = 0; 1340 sg_dma_len(sg) = left; 1341 sg = sg_next(sg); 1342 } 1343 1344 return sg; 1345 } 1346 1347 static noinline struct sg_table * 1348 intel_rotate_pages(struct intel_rotation_info *rot_info, 1349 struct drm_i915_gem_object *obj) 1350 { 1351 unsigned int size = intel_rotation_info_size(rot_info); 1352 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1353 struct sg_table *st; 1354 struct scatterlist *sg; 1355 int ret = -ENOMEM; 1356 int i; 1357 1358 /* Allocate target SG list. */ 1359 st = kmalloc(sizeof(*st), GFP_KERNEL); 1360 if (!st) 1361 goto err_st_alloc; 1362 1363 ret = sg_alloc_table(st, size, GFP_KERNEL); 1364 if (ret) 1365 goto err_sg_alloc; 1366 1367 st->nents = 0; 1368 sg = st->sgl; 1369 1370 for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) 1371 sg = rotate_pages(obj, rot_info->plane[i].offset, 1372 rot_info->plane[i].width, rot_info->plane[i].height, 1373 rot_info->plane[i].src_stride, 1374 rot_info->plane[i].dst_stride, 1375 st, sg); 1376 1377 return st; 1378 1379 err_sg_alloc: 1380 kfree(st); 1381 err_st_alloc: 1382 1383 drm_dbg(&i915->drm, "Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", 1384 obj->base.size, rot_info->plane[0].width, 1385 rot_info->plane[0].height, size); 1386 1387 return ERR_PTR(ret); 1388 } 1389 1390 static struct scatterlist * 1391 remap_pages(struct drm_i915_gem_object *obj, 1392 unsigned int offset, unsigned int alignment_pad, 1393 unsigned int width, unsigned int height, 1394 unsigned int src_stride, unsigned int dst_stride, 1395 struct sg_table *st, struct scatterlist *sg) 1396 { 1397 unsigned int row; 1398 1399 if (!width || !height) 1400 return sg; 1401 1402 if (alignment_pad) { 1403 st->nents++; 1404 1405 /* 1406 * The DE ignores the PTEs for the padding tiles, the sg entry 1407 * here is just a convenience to indicate how many padding PTEs 1408 * to insert at this spot. 1409 */ 1410 sg_set_page(sg, NULL, alignment_pad * 4096, 0); 1411 sg_dma_address(sg) = 0; 1412 sg_dma_len(sg) = alignment_pad * 4096; 1413 sg = sg_next(sg); 1414 } 1415 1416 for (row = 0; row < height; row++) { 1417 unsigned int left = width * I915_GTT_PAGE_SIZE; 1418 1419 while (left) { 1420 dma_addr_t addr; 1421 unsigned int length; 1422 1423 /* 1424 * We don't need the pages, but need to initialize 1425 * the entries so the sg list can be happily traversed. 1426 * The only thing we need are DMA addresses. 1427 */ 1428 1429 addr = i915_gem_object_get_dma_address_len(obj, offset, &length); 1430 1431 length = min(left, length); 1432 1433 st->nents++; 1434 1435 sg_set_page(sg, NULL, length, 0); 1436 sg_dma_address(sg) = addr; 1437 sg_dma_len(sg) = length; 1438 sg = sg_next(sg); 1439 1440 offset += length / I915_GTT_PAGE_SIZE; 1441 left -= length; 1442 } 1443 1444 offset += src_stride - width; 1445 1446 left = (dst_stride - width) * I915_GTT_PAGE_SIZE; 1447 1448 if (!left) 1449 continue; 1450 1451 st->nents++; 1452 1453 /* 1454 * The DE ignores the PTEs for the padding tiles, the sg entry 1455 * here is just a conenience to indicate how many padding PTEs 1456 * to insert at this spot. 1457 */ 1458 sg_set_page(sg, NULL, left, 0); 1459 sg_dma_address(sg) = 0; 1460 sg_dma_len(sg) = left; 1461 sg = sg_next(sg); 1462 } 1463 1464 return sg; 1465 } 1466 1467 static noinline struct sg_table * 1468 intel_remap_pages(struct intel_remapped_info *rem_info, 1469 struct drm_i915_gem_object *obj) 1470 { 1471 unsigned int size = intel_remapped_info_size(rem_info); 1472 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1473 struct sg_table *st; 1474 struct scatterlist *sg; 1475 unsigned int gtt_offset = 0; 1476 int ret = -ENOMEM; 1477 int i; 1478 1479 /* Allocate target SG list. */ 1480 st = kmalloc(sizeof(*st), GFP_KERNEL); 1481 if (!st) 1482 goto err_st_alloc; 1483 1484 ret = sg_alloc_table(st, size, GFP_KERNEL); 1485 if (ret) 1486 goto err_sg_alloc; 1487 1488 st->nents = 0; 1489 sg = st->sgl; 1490 1491 for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) { 1492 unsigned int alignment_pad = 0; 1493 1494 if (rem_info->plane_alignment) 1495 alignment_pad = ALIGN(gtt_offset, rem_info->plane_alignment) - gtt_offset; 1496 1497 sg = remap_pages(obj, 1498 rem_info->plane[i].offset, alignment_pad, 1499 rem_info->plane[i].width, rem_info->plane[i].height, 1500 rem_info->plane[i].src_stride, rem_info->plane[i].dst_stride, 1501 st, sg); 1502 1503 gtt_offset += alignment_pad + 1504 rem_info->plane[i].dst_stride * rem_info->plane[i].height; 1505 } 1506 1507 i915_sg_trim(st); 1508 1509 return st; 1510 1511 err_sg_alloc: 1512 kfree(st); 1513 err_st_alloc: 1514 1515 drm_dbg(&i915->drm, "Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n", 1516 obj->base.size, rem_info->plane[0].width, 1517 rem_info->plane[0].height, size); 1518 1519 return ERR_PTR(ret); 1520 } 1521 1522 static noinline struct sg_table * 1523 intel_partial_pages(const struct i915_ggtt_view *view, 1524 struct drm_i915_gem_object *obj) 1525 { 1526 struct sg_table *st; 1527 struct scatterlist *sg, *iter; 1528 unsigned int count = view->partial.size; 1529 unsigned int offset; 1530 int ret = -ENOMEM; 1531 1532 st = kmalloc(sizeof(*st), GFP_KERNEL); 1533 if (!st) 1534 goto err_st_alloc; 1535 1536 ret = sg_alloc_table(st, count, GFP_KERNEL); 1537 if (ret) 1538 goto err_sg_alloc; 1539 1540 iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset); 1541 GEM_BUG_ON(!iter); 1542 1543 sg = st->sgl; 1544 st->nents = 0; 1545 do { 1546 unsigned int len; 1547 1548 len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT), 1549 count << PAGE_SHIFT); 1550 sg_set_page(sg, NULL, len, 0); 1551 sg_dma_address(sg) = 1552 sg_dma_address(iter) + (offset << PAGE_SHIFT); 1553 sg_dma_len(sg) = len; 1554 1555 st->nents++; 1556 count -= len >> PAGE_SHIFT; 1557 if (count == 0) { 1558 sg_mark_end(sg); 1559 i915_sg_trim(st); /* Drop any unused tail entries. */ 1560 1561 return st; 1562 } 1563 1564 sg = __sg_next(sg); 1565 iter = __sg_next(iter); 1566 offset = 0; 1567 } while (1); 1568 1569 err_sg_alloc: 1570 kfree(st); 1571 err_st_alloc: 1572 return ERR_PTR(ret); 1573 } 1574 1575 static int 1576 i915_get_ggtt_vma_pages(struct i915_vma *vma) 1577 { 1578 int ret; 1579 1580 /* 1581 * The vma->pages are only valid within the lifespan of the borrowed 1582 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so 1583 * must be the vma->pages. A simple rule is that vma->pages must only 1584 * be accessed when the obj->mm.pages are pinned. 1585 */ 1586 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); 1587 1588 switch (vma->ggtt_view.type) { 1589 default: 1590 GEM_BUG_ON(vma->ggtt_view.type); 1591 fallthrough; 1592 case I915_GGTT_VIEW_NORMAL: 1593 vma->pages = vma->obj->mm.pages; 1594 return 0; 1595 1596 case I915_GGTT_VIEW_ROTATED: 1597 vma->pages = 1598 intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); 1599 break; 1600 1601 case I915_GGTT_VIEW_REMAPPED: 1602 vma->pages = 1603 intel_remap_pages(&vma->ggtt_view.remapped, vma->obj); 1604 break; 1605 1606 case I915_GGTT_VIEW_PARTIAL: 1607 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); 1608 break; 1609 } 1610 1611 ret = 0; 1612 if (IS_ERR(vma->pages)) { 1613 ret = PTR_ERR(vma->pages); 1614 vma->pages = NULL; 1615 drm_err(&vma->vm->i915->drm, 1616 "Failed to get pages for VMA view type %u (%d)!\n", 1617 vma->ggtt_view.type, ret); 1618 } 1619 return ret; 1620 } 1621