1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/stop_machine.h> 7 8 #include <asm/set_memory.h> 9 #include <asm/smp.h> 10 11 #include <drm/i915_drm.h> 12 13 #include "intel_gt.h" 14 #include "i915_drv.h" 15 #include "i915_scatterlist.h" 16 #include "i915_vgpu.h" 17 18 #include "intel_gtt.h" 19 20 static int 21 i915_get_ggtt_vma_pages(struct i915_vma *vma); 22 23 static void i915_ggtt_color_adjust(const struct drm_mm_node *node, 24 unsigned long color, 25 u64 *start, 26 u64 *end) 27 { 28 if (i915_node_color_differs(node, color)) 29 *start += I915_GTT_PAGE_SIZE; 30 31 /* 32 * Also leave a space between the unallocated reserved node after the 33 * GTT and any objects within the GTT, i.e. we use the color adjustment 34 * to insert a guard page to prevent prefetches crossing over the 35 * GTT boundary. 36 */ 37 node = list_next_entry(node, node_list); 38 if (node->color != color) 39 *end -= I915_GTT_PAGE_SIZE; 40 } 41 42 static int ggtt_init_hw(struct i915_ggtt *ggtt) 43 { 44 struct drm_i915_private *i915 = ggtt->vm.i915; 45 46 i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); 47 48 ggtt->vm.is_ggtt = true; 49 50 /* Only VLV supports read-only GGTT mappings */ 51 ggtt->vm.has_read_only = IS_VALLEYVIEW(i915); 52 53 if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) 54 ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust; 55 56 if (ggtt->mappable_end) { 57 if (!io_mapping_init_wc(&ggtt->iomap, 58 ggtt->gmadr.start, 59 ggtt->mappable_end)) { 60 ggtt->vm.cleanup(&ggtt->vm); 61 return -EIO; 62 } 63 64 ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, 65 ggtt->mappable_end); 66 } 67 68 intel_ggtt_init_fences(ggtt); 69 70 return 0; 71 } 72 73 /** 74 * i915_ggtt_init_hw - Initialize GGTT hardware 75 * @i915: i915 device 76 */ 77 int i915_ggtt_init_hw(struct drm_i915_private *i915) 78 { 79 int ret; 80 81 /* 82 * Note that we use page colouring to enforce a guard page at the 83 * end of the address space. This is required as the CS may prefetch 84 * beyond the end of the batch buffer, across the page boundary, 85 * and beyond the end of the GTT if we do not provide a guard. 86 */ 87 ret = ggtt_init_hw(&i915->ggtt); 88 if (ret) 89 return ret; 90 91 return 0; 92 } 93 94 /* 95 * Certain Gen5 chipsets require require idling the GPU before 96 * unmapping anything from the GTT when VT-d is enabled. 97 */ 98 static bool needs_idle_maps(struct drm_i915_private *i915) 99 { 100 /* 101 * Query intel_iommu to see if we need the workaround. Presumably that 102 * was loaded first. 103 */ 104 if (!intel_vtd_active()) 105 return false; 106 107 if (IS_GEN(i915, 5) && IS_MOBILE(i915)) 108 return true; 109 110 if (IS_GEN(i915, 12)) 111 return true; /* XXX DMAR fault reason 7 */ 112 113 return false; 114 } 115 116 void i915_ggtt_suspend(struct i915_ggtt *ggtt) 117 { 118 struct i915_vma *vma, *vn; 119 int open; 120 121 mutex_lock(&ggtt->vm.mutex); 122 123 /* Skip rewriting PTE on VMA unbind. */ 124 open = atomic_xchg(&ggtt->vm.open, 0); 125 126 list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { 127 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); 128 i915_vma_wait_for_bind(vma); 129 130 if (i915_vma_is_pinned(vma)) 131 continue; 132 133 if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { 134 __i915_vma_evict(vma); 135 drm_mm_remove_node(&vma->node); 136 } 137 } 138 139 ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); 140 ggtt->invalidate(ggtt); 141 atomic_set(&ggtt->vm.open, open); 142 143 mutex_unlock(&ggtt->vm.mutex); 144 145 intel_gt_check_and_clear_faults(ggtt->vm.gt); 146 } 147 148 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) 149 { 150 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 151 152 spin_lock_irq(&uncore->lock); 153 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 154 intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6); 155 spin_unlock_irq(&uncore->lock); 156 } 157 158 static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) 159 { 160 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 161 162 /* 163 * Note that as an uncached mmio write, this will flush the 164 * WCB of the writes into the GGTT before it triggers the invalidate. 165 */ 166 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 167 } 168 169 static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) 170 { 171 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 172 struct drm_i915_private *i915 = ggtt->vm.i915; 173 174 gen8_ggtt_invalidate(ggtt); 175 176 if (INTEL_GEN(i915) >= 12) 177 intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, 178 GEN12_GUC_TLB_INV_CR_INVALIDATE); 179 else 180 intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); 181 } 182 183 static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) 184 { 185 intel_gtt_chipset_flush(); 186 } 187 188 static u64 gen8_ggtt_pte_encode(dma_addr_t addr, 189 enum i915_cache_level level, 190 u32 flags) 191 { 192 return addr | _PAGE_PRESENT; 193 } 194 195 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 196 { 197 writeq(pte, addr); 198 } 199 200 static void gen8_ggtt_insert_page(struct i915_address_space *vm, 201 dma_addr_t addr, 202 u64 offset, 203 enum i915_cache_level level, 204 u32 unused) 205 { 206 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 207 gen8_pte_t __iomem *pte = 208 (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 209 210 gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, 0)); 211 212 ggtt->invalidate(ggtt); 213 } 214 215 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 216 struct i915_vma *vma, 217 enum i915_cache_level level, 218 u32 flags) 219 { 220 const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0); 221 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 222 gen8_pte_t __iomem *gte; 223 gen8_pte_t __iomem *end; 224 struct sgt_iter iter; 225 dma_addr_t addr; 226 227 /* 228 * Note that we ignore PTE_READ_ONLY here. The caller must be careful 229 * not to allow the user to override access to a read only page. 230 */ 231 232 gte = (gen8_pte_t __iomem *)ggtt->gsm; 233 gte += vma->node.start / I915_GTT_PAGE_SIZE; 234 end = gte + vma->node.size / I915_GTT_PAGE_SIZE; 235 236 for_each_sgt_daddr(addr, iter, vma->pages) 237 gen8_set_pte(gte++, pte_encode | addr); 238 GEM_BUG_ON(gte > end); 239 240 /* Fill the allocated but "unused" space beyond the end of the buffer */ 241 while (gte < end) 242 gen8_set_pte(gte++, vm->scratch[0]->encode); 243 244 /* 245 * We want to flush the TLBs only after we're certain all the PTE 246 * updates have finished. 247 */ 248 ggtt->invalidate(ggtt); 249 } 250 251 static void gen6_ggtt_insert_page(struct i915_address_space *vm, 252 dma_addr_t addr, 253 u64 offset, 254 enum i915_cache_level level, 255 u32 flags) 256 { 257 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 258 gen6_pte_t __iomem *pte = 259 (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 260 261 iowrite32(vm->pte_encode(addr, level, flags), pte); 262 263 ggtt->invalidate(ggtt); 264 } 265 266 /* 267 * Binds an object into the global gtt with the specified cache level. 268 * The object will be accessible to the GPU via commands whose operands 269 * reference offsets within the global GTT as well as accessible by the GPU 270 * through the GMADR mapped BAR (i915->mm.gtt->gtt). 271 */ 272 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 273 struct i915_vma *vma, 274 enum i915_cache_level level, 275 u32 flags) 276 { 277 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 278 gen6_pte_t __iomem *gte; 279 gen6_pte_t __iomem *end; 280 struct sgt_iter iter; 281 dma_addr_t addr; 282 283 gte = (gen6_pte_t __iomem *)ggtt->gsm; 284 gte += vma->node.start / I915_GTT_PAGE_SIZE; 285 end = gte + vma->node.size / I915_GTT_PAGE_SIZE; 286 287 for_each_sgt_daddr(addr, iter, vma->pages) 288 iowrite32(vm->pte_encode(addr, level, flags), gte++); 289 GEM_BUG_ON(gte > end); 290 291 /* Fill the allocated but "unused" space beyond the end of the buffer */ 292 while (gte < end) 293 iowrite32(vm->scratch[0]->encode, gte++); 294 295 /* 296 * We want to flush the TLBs only after we're certain all the PTE 297 * updates have finished. 298 */ 299 ggtt->invalidate(ggtt); 300 } 301 302 static void nop_clear_range(struct i915_address_space *vm, 303 u64 start, u64 length) 304 { 305 } 306 307 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 308 u64 start, u64 length) 309 { 310 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 311 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 312 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 313 const gen8_pte_t scratch_pte = vm->scratch[0]->encode; 314 gen8_pte_t __iomem *gtt_base = 315 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 316 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 317 int i; 318 319 if (WARN(num_entries > max_entries, 320 "First entry = %d; Num entries = %d (max=%d)\n", 321 first_entry, num_entries, max_entries)) 322 num_entries = max_entries; 323 324 for (i = 0; i < num_entries; i++) 325 gen8_set_pte(>t_base[i], scratch_pte); 326 } 327 328 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) 329 { 330 /* 331 * Make sure the internal GAM fifo has been cleared of all GTT 332 * writes before exiting stop_machine(). This guarantees that 333 * any aperture accesses waiting to start in another process 334 * cannot back up behind the GTT writes causing a hang. 335 * The register can be any arbitrary GAM register. 336 */ 337 intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6); 338 } 339 340 struct insert_page { 341 struct i915_address_space *vm; 342 dma_addr_t addr; 343 u64 offset; 344 enum i915_cache_level level; 345 }; 346 347 static int bxt_vtd_ggtt_insert_page__cb(void *_arg) 348 { 349 struct insert_page *arg = _arg; 350 351 gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0); 352 bxt_vtd_ggtt_wa(arg->vm); 353 354 return 0; 355 } 356 357 static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, 358 dma_addr_t addr, 359 u64 offset, 360 enum i915_cache_level level, 361 u32 unused) 362 { 363 struct insert_page arg = { vm, addr, offset, level }; 364 365 stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); 366 } 367 368 struct insert_entries { 369 struct i915_address_space *vm; 370 struct i915_vma *vma; 371 enum i915_cache_level level; 372 u32 flags; 373 }; 374 375 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) 376 { 377 struct insert_entries *arg = _arg; 378 379 gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags); 380 bxt_vtd_ggtt_wa(arg->vm); 381 382 return 0; 383 } 384 385 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, 386 struct i915_vma *vma, 387 enum i915_cache_level level, 388 u32 flags) 389 { 390 struct insert_entries arg = { vm, vma, level, flags }; 391 392 stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); 393 } 394 395 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 396 u64 start, u64 length) 397 { 398 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 399 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 400 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 401 gen6_pte_t scratch_pte, __iomem *gtt_base = 402 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 403 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 404 int i; 405 406 if (WARN(num_entries > max_entries, 407 "First entry = %d; Num entries = %d (max=%d)\n", 408 first_entry, num_entries, max_entries)) 409 num_entries = max_entries; 410 411 scratch_pte = vm->scratch[0]->encode; 412 for (i = 0; i < num_entries; i++) 413 iowrite32(scratch_pte, >t_base[i]); 414 } 415 416 static void i915_ggtt_insert_page(struct i915_address_space *vm, 417 dma_addr_t addr, 418 u64 offset, 419 enum i915_cache_level cache_level, 420 u32 unused) 421 { 422 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 423 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 424 425 intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); 426 } 427 428 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 429 struct i915_vma *vma, 430 enum i915_cache_level cache_level, 431 u32 unused) 432 { 433 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 434 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 435 436 intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT, 437 flags); 438 } 439 440 static void i915_ggtt_clear_range(struct i915_address_space *vm, 441 u64 start, u64 length) 442 { 443 intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); 444 } 445 446 static void ggtt_bind_vma(struct i915_address_space *vm, 447 struct i915_vm_pt_stash *stash, 448 struct i915_vma *vma, 449 enum i915_cache_level cache_level, 450 u32 flags) 451 { 452 struct drm_i915_gem_object *obj = vma->obj; 453 u32 pte_flags; 454 455 if (i915_vma_is_bound(vma, ~flags & I915_VMA_BIND_MASK)) 456 return; 457 458 /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ 459 pte_flags = 0; 460 if (i915_gem_object_is_readonly(obj)) 461 pte_flags |= PTE_READ_ONLY; 462 463 vm->insert_entries(vm, vma, cache_level, pte_flags); 464 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; 465 } 466 467 static void ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma) 468 { 469 vm->clear_range(vm, vma->node.start, vma->size); 470 } 471 472 static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) 473 { 474 u64 size; 475 int ret; 476 477 if (!intel_uc_uses_guc(&ggtt->vm.gt->uc)) 478 return 0; 479 480 GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); 481 size = ggtt->vm.total - GUC_GGTT_TOP; 482 483 ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size, 484 GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, 485 PIN_NOEVICT); 486 if (ret) 487 drm_dbg(&ggtt->vm.i915->drm, 488 "Failed to reserve top of GGTT for GuC\n"); 489 490 return ret; 491 } 492 493 static void ggtt_release_guc_top(struct i915_ggtt *ggtt) 494 { 495 if (drm_mm_node_allocated(&ggtt->uc_fw)) 496 drm_mm_remove_node(&ggtt->uc_fw); 497 } 498 499 static void cleanup_init_ggtt(struct i915_ggtt *ggtt) 500 { 501 ggtt_release_guc_top(ggtt); 502 if (drm_mm_node_allocated(&ggtt->error_capture)) 503 drm_mm_remove_node(&ggtt->error_capture); 504 mutex_destroy(&ggtt->error_mutex); 505 } 506 507 static int init_ggtt(struct i915_ggtt *ggtt) 508 { 509 /* 510 * Let GEM Manage all of the aperture. 511 * 512 * However, leave one page at the end still bound to the scratch page. 513 * There are a number of places where the hardware apparently prefetches 514 * past the end of the object, and we've seen multiple hangs with the 515 * GPU head pointer stuck in a batchbuffer bound at the last page of the 516 * aperture. One page should be enough to keep any prefetching inside 517 * of the aperture. 518 */ 519 unsigned long hole_start, hole_end; 520 struct drm_mm_node *entry; 521 int ret; 522 523 /* 524 * GuC requires all resources that we're sharing with it to be placed in 525 * non-WOPCM memory. If GuC is not present or not in use we still need a 526 * small bias as ring wraparound at offset 0 sometimes hangs. No idea 527 * why. 528 */ 529 ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE, 530 intel_wopcm_guc_size(&ggtt->vm.i915->wopcm)); 531 532 ret = intel_vgt_balloon(ggtt); 533 if (ret) 534 return ret; 535 536 mutex_init(&ggtt->error_mutex); 537 if (ggtt->mappable_end) { 538 /* 539 * Reserve a mappable slot for our lockless error capture. 540 * 541 * We strongly prefer taking address 0x0 in order to protect 542 * other critical buffers against accidental overwrites, 543 * as writing to address 0 is a very common mistake. 544 * 545 * Since 0 may already be in use by the system (e.g. the BIOS 546 * framebuffer), we let the reservation fail quietly and hope 547 * 0 remains reserved always. 548 * 549 * If we fail to reserve 0, and then fail to find any space 550 * for an error-capture, remain silent. We can afford not 551 * to reserve an error_capture node as we have fallback 552 * paths, and we trust that 0 will remain reserved. However, 553 * the only likely reason for failure to insert is a driver 554 * bug, which we expect to cause other failures... 555 */ 556 ggtt->error_capture.size = I915_GTT_PAGE_SIZE; 557 ggtt->error_capture.color = I915_COLOR_UNEVICTABLE; 558 if (drm_mm_reserve_node(&ggtt->vm.mm, &ggtt->error_capture)) 559 drm_mm_insert_node_in_range(&ggtt->vm.mm, 560 &ggtt->error_capture, 561 ggtt->error_capture.size, 0, 562 ggtt->error_capture.color, 563 0, ggtt->mappable_end, 564 DRM_MM_INSERT_LOW); 565 } 566 if (drm_mm_node_allocated(&ggtt->error_capture)) 567 drm_dbg(&ggtt->vm.i915->drm, 568 "Reserved GGTT:[%llx, %llx] for use by error capture\n", 569 ggtt->error_capture.start, 570 ggtt->error_capture.start + ggtt->error_capture.size); 571 572 /* 573 * The upper portion of the GuC address space has a sizeable hole 574 * (several MB) that is inaccessible by GuC. Reserve this range within 575 * GGTT as it can comfortably hold GuC/HuC firmware images. 576 */ 577 ret = ggtt_reserve_guc_top(ggtt); 578 if (ret) 579 goto err; 580 581 /* Clear any non-preallocated blocks */ 582 drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { 583 drm_dbg(&ggtt->vm.i915->drm, 584 "clearing unused GTT space: [%lx, %lx]\n", 585 hole_start, hole_end); 586 ggtt->vm.clear_range(&ggtt->vm, hole_start, 587 hole_end - hole_start); 588 } 589 590 /* And finally clear the reserved guard page */ 591 ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE); 592 593 return 0; 594 595 err: 596 cleanup_init_ggtt(ggtt); 597 return ret; 598 } 599 600 static void aliasing_gtt_bind_vma(struct i915_address_space *vm, 601 struct i915_vm_pt_stash *stash, 602 struct i915_vma *vma, 603 enum i915_cache_level cache_level, 604 u32 flags) 605 { 606 u32 pte_flags; 607 608 /* Currently applicable only to VLV */ 609 pte_flags = 0; 610 if (i915_gem_object_is_readonly(vma->obj)) 611 pte_flags |= PTE_READ_ONLY; 612 613 if (flags & I915_VMA_LOCAL_BIND) 614 ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm, 615 stash, vma, cache_level, flags); 616 617 if (flags & I915_VMA_GLOBAL_BIND) 618 vm->insert_entries(vm, vma, cache_level, pte_flags); 619 } 620 621 static void aliasing_gtt_unbind_vma(struct i915_address_space *vm, 622 struct i915_vma *vma) 623 { 624 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 625 vm->clear_range(vm, vma->node.start, vma->size); 626 627 if (i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) 628 ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma); 629 } 630 631 static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) 632 { 633 struct i915_vm_pt_stash stash = {}; 634 struct i915_ppgtt *ppgtt; 635 int err; 636 637 ppgtt = i915_ppgtt_create(ggtt->vm.gt); 638 if (IS_ERR(ppgtt)) 639 return PTR_ERR(ppgtt); 640 641 if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) { 642 err = -ENODEV; 643 goto err_ppgtt; 644 } 645 646 err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, ggtt->vm.total); 647 if (err) 648 goto err_ppgtt; 649 650 err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash); 651 if (err) 652 goto err_stash; 653 654 /* 655 * Note we only pre-allocate as far as the end of the global 656 * GTT. On 48b / 4-level page-tables, the difference is very, 657 * very significant! We have to preallocate as GVT/vgpu does 658 * not like the page directory disappearing. 659 */ 660 ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total); 661 662 ggtt->alias = ppgtt; 663 ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; 664 665 GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma); 666 ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; 667 668 GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); 669 ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; 670 671 i915_vm_free_pt_stash(&ppgtt->vm, &stash); 672 return 0; 673 674 err_stash: 675 i915_vm_free_pt_stash(&ppgtt->vm, &stash); 676 err_ppgtt: 677 i915_vm_put(&ppgtt->vm); 678 return err; 679 } 680 681 static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) 682 { 683 struct i915_ppgtt *ppgtt; 684 685 ppgtt = fetch_and_zero(&ggtt->alias); 686 if (!ppgtt) 687 return; 688 689 i915_vm_put(&ppgtt->vm); 690 691 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 692 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 693 } 694 695 int i915_init_ggtt(struct drm_i915_private *i915) 696 { 697 int ret; 698 699 ret = init_ggtt(&i915->ggtt); 700 if (ret) 701 return ret; 702 703 if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) { 704 ret = init_aliasing_ppgtt(&i915->ggtt); 705 if (ret) 706 cleanup_init_ggtt(&i915->ggtt); 707 } 708 709 return 0; 710 } 711 712 static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) 713 { 714 struct i915_vma *vma, *vn; 715 716 atomic_set(&ggtt->vm.open, 0); 717 718 rcu_barrier(); /* flush the RCU'ed__i915_vm_release */ 719 flush_workqueue(ggtt->vm.i915->wq); 720 721 mutex_lock(&ggtt->vm.mutex); 722 723 list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) 724 WARN_ON(__i915_vma_unbind(vma)); 725 726 if (drm_mm_node_allocated(&ggtt->error_capture)) 727 drm_mm_remove_node(&ggtt->error_capture); 728 mutex_destroy(&ggtt->error_mutex); 729 730 ggtt_release_guc_top(ggtt); 731 intel_vgt_deballoon(ggtt); 732 733 ggtt->vm.cleanup(&ggtt->vm); 734 735 mutex_unlock(&ggtt->vm.mutex); 736 i915_address_space_fini(&ggtt->vm); 737 738 arch_phys_wc_del(ggtt->mtrr); 739 740 if (ggtt->iomap.size) 741 io_mapping_fini(&ggtt->iomap); 742 } 743 744 /** 745 * i915_ggtt_driver_release - Clean up GGTT hardware initialization 746 * @i915: i915 device 747 */ 748 void i915_ggtt_driver_release(struct drm_i915_private *i915) 749 { 750 struct i915_ggtt *ggtt = &i915->ggtt; 751 752 fini_aliasing_ppgtt(ggtt); 753 754 intel_ggtt_fini_fences(ggtt); 755 ggtt_cleanup_hw(ggtt); 756 } 757 758 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 759 { 760 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 761 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 762 return snb_gmch_ctl << 20; 763 } 764 765 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 766 { 767 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 768 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 769 if (bdw_gmch_ctl) 770 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 771 772 #ifdef CONFIG_X86_32 773 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ 774 if (bdw_gmch_ctl > 4) 775 bdw_gmch_ctl = 4; 776 #endif 777 778 return bdw_gmch_ctl << 20; 779 } 780 781 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 782 { 783 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 784 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 785 786 if (gmch_ctrl) 787 return 1 << (20 + gmch_ctrl); 788 789 return 0; 790 } 791 792 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 793 { 794 struct drm_i915_private *i915 = ggtt->vm.i915; 795 struct pci_dev *pdev = i915->drm.pdev; 796 phys_addr_t phys_addr; 797 int ret; 798 799 /* For Modern GENs the PTEs and register space are split in the BAR */ 800 phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; 801 802 /* 803 * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range 804 * will be dropped. For WC mappings in general we have 64 byte burst 805 * writes when the WC buffer is flushed, so we can't use it, but have to 806 * resort to an uncached mapping. The WC issue is easily caught by the 807 * readback check when writing GTT PTE entries. 808 */ 809 if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10) 810 ggtt->gsm = ioremap(phys_addr, size); 811 else 812 ggtt->gsm = ioremap_wc(phys_addr, size); 813 if (!ggtt->gsm) { 814 drm_err(&i915->drm, "Failed to map the ggtt page table\n"); 815 return -ENOMEM; 816 } 817 818 ret = setup_scratch_page(&ggtt->vm); 819 if (ret) { 820 drm_err(&i915->drm, "Scratch setup failed\n"); 821 /* iounmap will also get called at remove, but meh */ 822 iounmap(ggtt->gsm); 823 return ret; 824 } 825 826 ggtt->vm.scratch[0]->encode = 827 ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]), 828 I915_CACHE_NONE, 0); 829 830 return 0; 831 } 832 833 int ggtt_set_pages(struct i915_vma *vma) 834 { 835 int ret; 836 837 GEM_BUG_ON(vma->pages); 838 839 ret = i915_get_ggtt_vma_pages(vma); 840 if (ret) 841 return ret; 842 843 vma->page_sizes = vma->obj->mm.page_sizes; 844 845 return 0; 846 } 847 848 static void gen6_gmch_remove(struct i915_address_space *vm) 849 { 850 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 851 852 iounmap(ggtt->gsm); 853 free_scratch(vm); 854 } 855 856 static struct resource pci_resource(struct pci_dev *pdev, int bar) 857 { 858 return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar), 859 pci_resource_len(pdev, bar)); 860 } 861 862 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 863 { 864 struct drm_i915_private *i915 = ggtt->vm.i915; 865 struct pci_dev *pdev = i915->drm.pdev; 866 unsigned int size; 867 u16 snb_gmch_ctl; 868 869 /* TODO: We're not aware of mappable constraints on gen8 yet */ 870 if (!HAS_LMEM(i915)) { 871 ggtt->gmadr = pci_resource(pdev, 2); 872 ggtt->mappable_end = resource_size(&ggtt->gmadr); 873 } 874 875 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 876 if (IS_CHERRYVIEW(i915)) 877 size = chv_get_total_gtt_size(snb_gmch_ctl); 878 else 879 size = gen8_get_total_gtt_size(snb_gmch_ctl); 880 881 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 882 883 ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; 884 ggtt->vm.cleanup = gen6_gmch_remove; 885 ggtt->vm.insert_page = gen8_ggtt_insert_page; 886 ggtt->vm.clear_range = nop_clear_range; 887 if (intel_scanout_needs_vtd_wa(i915)) 888 ggtt->vm.clear_range = gen8_ggtt_clear_range; 889 890 ggtt->vm.insert_entries = gen8_ggtt_insert_entries; 891 892 /* Serialize GTT updates with aperture access on BXT if VT-d is on. */ 893 if (intel_ggtt_update_needs_vtd_wa(i915) || 894 IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) { 895 ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; 896 ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; 897 ggtt->vm.bind_async_flags = 898 I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; 899 } 900 901 ggtt->invalidate = gen8_ggtt_invalidate; 902 903 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 904 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 905 ggtt->vm.vma_ops.set_pages = ggtt_set_pages; 906 ggtt->vm.vma_ops.clear_pages = clear_pages; 907 908 ggtt->vm.pte_encode = gen8_ggtt_pte_encode; 909 910 setup_private_pat(ggtt->vm.gt->uncore); 911 912 return ggtt_probe_common(ggtt, size); 913 } 914 915 static u64 snb_pte_encode(dma_addr_t addr, 916 enum i915_cache_level level, 917 u32 flags) 918 { 919 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 920 921 switch (level) { 922 case I915_CACHE_L3_LLC: 923 case I915_CACHE_LLC: 924 pte |= GEN6_PTE_CACHE_LLC; 925 break; 926 case I915_CACHE_NONE: 927 pte |= GEN6_PTE_UNCACHED; 928 break; 929 default: 930 MISSING_CASE(level); 931 } 932 933 return pte; 934 } 935 936 static u64 ivb_pte_encode(dma_addr_t addr, 937 enum i915_cache_level level, 938 u32 flags) 939 { 940 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 941 942 switch (level) { 943 case I915_CACHE_L3_LLC: 944 pte |= GEN7_PTE_CACHE_L3_LLC; 945 break; 946 case I915_CACHE_LLC: 947 pte |= GEN6_PTE_CACHE_LLC; 948 break; 949 case I915_CACHE_NONE: 950 pte |= GEN6_PTE_UNCACHED; 951 break; 952 default: 953 MISSING_CASE(level); 954 } 955 956 return pte; 957 } 958 959 static u64 byt_pte_encode(dma_addr_t addr, 960 enum i915_cache_level level, 961 u32 flags) 962 { 963 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 964 965 if (!(flags & PTE_READ_ONLY)) 966 pte |= BYT_PTE_WRITEABLE; 967 968 if (level != I915_CACHE_NONE) 969 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 970 971 return pte; 972 } 973 974 static u64 hsw_pte_encode(dma_addr_t addr, 975 enum i915_cache_level level, 976 u32 flags) 977 { 978 gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 979 980 if (level != I915_CACHE_NONE) 981 pte |= HSW_WB_LLC_AGE3; 982 983 return pte; 984 } 985 986 static u64 iris_pte_encode(dma_addr_t addr, 987 enum i915_cache_level level, 988 u32 flags) 989 { 990 gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 991 992 switch (level) { 993 case I915_CACHE_NONE: 994 break; 995 case I915_CACHE_WT: 996 pte |= HSW_WT_ELLC_LLC_AGE3; 997 break; 998 default: 999 pte |= HSW_WB_ELLC_LLC_AGE3; 1000 break; 1001 } 1002 1003 return pte; 1004 } 1005 1006 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 1007 { 1008 struct drm_i915_private *i915 = ggtt->vm.i915; 1009 struct pci_dev *pdev = i915->drm.pdev; 1010 unsigned int size; 1011 u16 snb_gmch_ctl; 1012 1013 ggtt->gmadr = pci_resource(pdev, 2); 1014 ggtt->mappable_end = resource_size(&ggtt->gmadr); 1015 1016 /* 1017 * 64/512MB is the current min/max we actually know of, but this is 1018 * just a coarse sanity check. 1019 */ 1020 if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { 1021 drm_err(&i915->drm, "Unknown GMADR size (%pa)\n", 1022 &ggtt->mappable_end); 1023 return -ENXIO; 1024 } 1025 1026 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1027 1028 size = gen6_get_total_gtt_size(snb_gmch_ctl); 1029 ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; 1030 1031 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 1032 1033 ggtt->vm.clear_range = nop_clear_range; 1034 if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915)) 1035 ggtt->vm.clear_range = gen6_ggtt_clear_range; 1036 ggtt->vm.insert_page = gen6_ggtt_insert_page; 1037 ggtt->vm.insert_entries = gen6_ggtt_insert_entries; 1038 ggtt->vm.cleanup = gen6_gmch_remove; 1039 1040 ggtt->invalidate = gen6_ggtt_invalidate; 1041 1042 if (HAS_EDRAM(i915)) 1043 ggtt->vm.pte_encode = iris_pte_encode; 1044 else if (IS_HASWELL(i915)) 1045 ggtt->vm.pte_encode = hsw_pte_encode; 1046 else if (IS_VALLEYVIEW(i915)) 1047 ggtt->vm.pte_encode = byt_pte_encode; 1048 else if (INTEL_GEN(i915) >= 7) 1049 ggtt->vm.pte_encode = ivb_pte_encode; 1050 else 1051 ggtt->vm.pte_encode = snb_pte_encode; 1052 1053 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 1054 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 1055 ggtt->vm.vma_ops.set_pages = ggtt_set_pages; 1056 ggtt->vm.vma_ops.clear_pages = clear_pages; 1057 1058 return ggtt_probe_common(ggtt, size); 1059 } 1060 1061 static void i915_gmch_remove(struct i915_address_space *vm) 1062 { 1063 intel_gmch_remove(); 1064 } 1065 1066 static int i915_gmch_probe(struct i915_ggtt *ggtt) 1067 { 1068 struct drm_i915_private *i915 = ggtt->vm.i915; 1069 phys_addr_t gmadr_base; 1070 int ret; 1071 1072 ret = intel_gmch_probe(i915->bridge_dev, i915->drm.pdev, NULL); 1073 if (!ret) { 1074 drm_err(&i915->drm, "failed to set up gmch\n"); 1075 return -EIO; 1076 } 1077 1078 intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end); 1079 1080 ggtt->gmadr = 1081 (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); 1082 1083 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 1084 1085 if (needs_idle_maps(i915)) { 1086 drm_notice(&i915->drm, 1087 "Flushing DMA requests before IOMMU unmaps; performance may be degraded\n"); 1088 ggtt->do_idle_maps = true; 1089 } 1090 1091 ggtt->vm.insert_page = i915_ggtt_insert_page; 1092 ggtt->vm.insert_entries = i915_ggtt_insert_entries; 1093 ggtt->vm.clear_range = i915_ggtt_clear_range; 1094 ggtt->vm.cleanup = i915_gmch_remove; 1095 1096 ggtt->invalidate = gmch_ggtt_invalidate; 1097 1098 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 1099 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 1100 ggtt->vm.vma_ops.set_pages = ggtt_set_pages; 1101 ggtt->vm.vma_ops.clear_pages = clear_pages; 1102 1103 if (unlikely(ggtt->do_idle_maps)) 1104 drm_notice(&i915->drm, 1105 "Applying Ironlake quirks for intel_iommu\n"); 1106 1107 return 0; 1108 } 1109 1110 static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) 1111 { 1112 struct drm_i915_private *i915 = gt->i915; 1113 int ret; 1114 1115 ggtt->vm.gt = gt; 1116 ggtt->vm.i915 = i915; 1117 ggtt->vm.dma = &i915->drm.pdev->dev; 1118 1119 if (INTEL_GEN(i915) <= 5) 1120 ret = i915_gmch_probe(ggtt); 1121 else if (INTEL_GEN(i915) < 8) 1122 ret = gen6_gmch_probe(ggtt); 1123 else 1124 ret = gen8_gmch_probe(ggtt); 1125 if (ret) 1126 return ret; 1127 1128 if ((ggtt->vm.total - 1) >> 32) { 1129 drm_err(&i915->drm, 1130 "We never expected a Global GTT with more than 32bits" 1131 " of address space! Found %lldM!\n", 1132 ggtt->vm.total >> 20); 1133 ggtt->vm.total = 1ULL << 32; 1134 ggtt->mappable_end = 1135 min_t(u64, ggtt->mappable_end, ggtt->vm.total); 1136 } 1137 1138 if (ggtt->mappable_end > ggtt->vm.total) { 1139 drm_err(&i915->drm, 1140 "mappable aperture extends past end of GGTT," 1141 " aperture=%pa, total=%llx\n", 1142 &ggtt->mappable_end, ggtt->vm.total); 1143 ggtt->mappable_end = ggtt->vm.total; 1144 } 1145 1146 /* GMADR is the PCI mmio aperture into the global GTT. */ 1147 drm_dbg(&i915->drm, "GGTT size = %lluM\n", ggtt->vm.total >> 20); 1148 drm_dbg(&i915->drm, "GMADR size = %lluM\n", 1149 (u64)ggtt->mappable_end >> 20); 1150 drm_dbg(&i915->drm, "DSM size = %lluM\n", 1151 (u64)resource_size(&intel_graphics_stolen_res) >> 20); 1152 1153 return 0; 1154 } 1155 1156 /** 1157 * i915_ggtt_probe_hw - Probe GGTT hardware location 1158 * @i915: i915 device 1159 */ 1160 int i915_ggtt_probe_hw(struct drm_i915_private *i915) 1161 { 1162 int ret; 1163 1164 ret = ggtt_probe_hw(&i915->ggtt, &i915->gt); 1165 if (ret) 1166 return ret; 1167 1168 if (intel_vtd_active()) 1169 drm_info(&i915->drm, "VT-d active for gfx access\n"); 1170 1171 return 0; 1172 } 1173 1174 int i915_ggtt_enable_hw(struct drm_i915_private *i915) 1175 { 1176 if (INTEL_GEN(i915) < 6 && !intel_enable_gtt()) 1177 return -EIO; 1178 1179 return 0; 1180 } 1181 1182 void i915_ggtt_enable_guc(struct i915_ggtt *ggtt) 1183 { 1184 GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate); 1185 1186 ggtt->invalidate = guc_ggtt_invalidate; 1187 1188 ggtt->invalidate(ggtt); 1189 } 1190 1191 void i915_ggtt_disable_guc(struct i915_ggtt *ggtt) 1192 { 1193 /* XXX Temporary pardon for error unload */ 1194 if (ggtt->invalidate == gen8_ggtt_invalidate) 1195 return; 1196 1197 /* We should only be called after i915_ggtt_enable_guc() */ 1198 GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate); 1199 1200 ggtt->invalidate = gen8_ggtt_invalidate; 1201 1202 ggtt->invalidate(ggtt); 1203 } 1204 1205 void i915_ggtt_resume(struct i915_ggtt *ggtt) 1206 { 1207 struct i915_vma *vma; 1208 bool flush = false; 1209 int open; 1210 1211 intel_gt_check_and_clear_faults(ggtt->vm.gt); 1212 1213 /* First fill our portion of the GTT with scratch pages */ 1214 ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); 1215 1216 /* Skip rewriting PTE on VMA unbind. */ 1217 open = atomic_xchg(&ggtt->vm.open, 0); 1218 1219 /* clflush objects bound into the GGTT and rebind them. */ 1220 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) { 1221 struct drm_i915_gem_object *obj = vma->obj; 1222 unsigned int was_bound = 1223 atomic_read(&vma->flags) & I915_VMA_BIND_MASK; 1224 1225 GEM_BUG_ON(!was_bound); 1226 vma->ops->bind_vma(&ggtt->vm, NULL, vma, 1227 obj ? obj->cache_level : 0, 1228 was_bound); 1229 if (obj) { /* only used during resume => exclusive access */ 1230 flush |= fetch_and_zero(&obj->write_domain); 1231 obj->read_domains |= I915_GEM_DOMAIN_GTT; 1232 } 1233 } 1234 1235 atomic_set(&ggtt->vm.open, open); 1236 ggtt->invalidate(ggtt); 1237 1238 if (flush) 1239 wbinvd_on_all_cpus(); 1240 1241 if (INTEL_GEN(ggtt->vm.i915) >= 8) 1242 setup_private_pat(ggtt->vm.gt->uncore); 1243 1244 intel_ggtt_restore_fences(ggtt); 1245 } 1246 1247 static struct scatterlist * 1248 rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset, 1249 unsigned int width, unsigned int height, 1250 unsigned int stride, 1251 struct sg_table *st, struct scatterlist *sg) 1252 { 1253 unsigned int column, row; 1254 unsigned int src_idx; 1255 1256 for (column = 0; column < width; column++) { 1257 src_idx = stride * (height - 1) + column + offset; 1258 for (row = 0; row < height; row++) { 1259 st->nents++; 1260 /* 1261 * We don't need the pages, but need to initialize 1262 * the entries so the sg list can be happily traversed. 1263 * The only thing we need are DMA addresses. 1264 */ 1265 sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0); 1266 sg_dma_address(sg) = 1267 i915_gem_object_get_dma_address(obj, src_idx); 1268 sg_dma_len(sg) = I915_GTT_PAGE_SIZE; 1269 sg = sg_next(sg); 1270 src_idx -= stride; 1271 } 1272 } 1273 1274 return sg; 1275 } 1276 1277 static noinline struct sg_table * 1278 intel_rotate_pages(struct intel_rotation_info *rot_info, 1279 struct drm_i915_gem_object *obj) 1280 { 1281 unsigned int size = intel_rotation_info_size(rot_info); 1282 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1283 struct sg_table *st; 1284 struct scatterlist *sg; 1285 int ret = -ENOMEM; 1286 int i; 1287 1288 /* Allocate target SG list. */ 1289 st = kmalloc(sizeof(*st), GFP_KERNEL); 1290 if (!st) 1291 goto err_st_alloc; 1292 1293 ret = sg_alloc_table(st, size, GFP_KERNEL); 1294 if (ret) 1295 goto err_sg_alloc; 1296 1297 st->nents = 0; 1298 sg = st->sgl; 1299 1300 for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { 1301 sg = rotate_pages(obj, rot_info->plane[i].offset, 1302 rot_info->plane[i].width, rot_info->plane[i].height, 1303 rot_info->plane[i].stride, st, sg); 1304 } 1305 1306 return st; 1307 1308 err_sg_alloc: 1309 kfree(st); 1310 err_st_alloc: 1311 1312 drm_dbg(&i915->drm, "Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", 1313 obj->base.size, rot_info->plane[0].width, 1314 rot_info->plane[0].height, size); 1315 1316 return ERR_PTR(ret); 1317 } 1318 1319 static struct scatterlist * 1320 remap_pages(struct drm_i915_gem_object *obj, unsigned int offset, 1321 unsigned int width, unsigned int height, 1322 unsigned int stride, 1323 struct sg_table *st, struct scatterlist *sg) 1324 { 1325 unsigned int row; 1326 1327 for (row = 0; row < height; row++) { 1328 unsigned int left = width * I915_GTT_PAGE_SIZE; 1329 1330 while (left) { 1331 dma_addr_t addr; 1332 unsigned int length; 1333 1334 /* 1335 * We don't need the pages, but need to initialize 1336 * the entries so the sg list can be happily traversed. 1337 * The only thing we need are DMA addresses. 1338 */ 1339 1340 addr = i915_gem_object_get_dma_address_len(obj, offset, &length); 1341 1342 length = min(left, length); 1343 1344 st->nents++; 1345 1346 sg_set_page(sg, NULL, length, 0); 1347 sg_dma_address(sg) = addr; 1348 sg_dma_len(sg) = length; 1349 sg = sg_next(sg); 1350 1351 offset += length / I915_GTT_PAGE_SIZE; 1352 left -= length; 1353 } 1354 1355 offset += stride - width; 1356 } 1357 1358 return sg; 1359 } 1360 1361 static noinline struct sg_table * 1362 intel_remap_pages(struct intel_remapped_info *rem_info, 1363 struct drm_i915_gem_object *obj) 1364 { 1365 unsigned int size = intel_remapped_info_size(rem_info); 1366 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1367 struct sg_table *st; 1368 struct scatterlist *sg; 1369 int ret = -ENOMEM; 1370 int i; 1371 1372 /* Allocate target SG list. */ 1373 st = kmalloc(sizeof(*st), GFP_KERNEL); 1374 if (!st) 1375 goto err_st_alloc; 1376 1377 ret = sg_alloc_table(st, size, GFP_KERNEL); 1378 if (ret) 1379 goto err_sg_alloc; 1380 1381 st->nents = 0; 1382 sg = st->sgl; 1383 1384 for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) { 1385 sg = remap_pages(obj, rem_info->plane[i].offset, 1386 rem_info->plane[i].width, rem_info->plane[i].height, 1387 rem_info->plane[i].stride, st, sg); 1388 } 1389 1390 i915_sg_trim(st); 1391 1392 return st; 1393 1394 err_sg_alloc: 1395 kfree(st); 1396 err_st_alloc: 1397 1398 drm_dbg(&i915->drm, "Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n", 1399 obj->base.size, rem_info->plane[0].width, 1400 rem_info->plane[0].height, size); 1401 1402 return ERR_PTR(ret); 1403 } 1404 1405 static noinline struct sg_table * 1406 intel_partial_pages(const struct i915_ggtt_view *view, 1407 struct drm_i915_gem_object *obj) 1408 { 1409 struct sg_table *st; 1410 struct scatterlist *sg, *iter; 1411 unsigned int count = view->partial.size; 1412 unsigned int offset; 1413 int ret = -ENOMEM; 1414 1415 st = kmalloc(sizeof(*st), GFP_KERNEL); 1416 if (!st) 1417 goto err_st_alloc; 1418 1419 ret = sg_alloc_table(st, count, GFP_KERNEL); 1420 if (ret) 1421 goto err_sg_alloc; 1422 1423 iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset); 1424 GEM_BUG_ON(!iter); 1425 1426 sg = st->sgl; 1427 st->nents = 0; 1428 do { 1429 unsigned int len; 1430 1431 len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT), 1432 count << PAGE_SHIFT); 1433 sg_set_page(sg, NULL, len, 0); 1434 sg_dma_address(sg) = 1435 sg_dma_address(iter) + (offset << PAGE_SHIFT); 1436 sg_dma_len(sg) = len; 1437 1438 st->nents++; 1439 count -= len >> PAGE_SHIFT; 1440 if (count == 0) { 1441 sg_mark_end(sg); 1442 i915_sg_trim(st); /* Drop any unused tail entries. */ 1443 1444 return st; 1445 } 1446 1447 sg = __sg_next(sg); 1448 iter = __sg_next(iter); 1449 offset = 0; 1450 } while (1); 1451 1452 err_sg_alloc: 1453 kfree(st); 1454 err_st_alloc: 1455 return ERR_PTR(ret); 1456 } 1457 1458 static int 1459 i915_get_ggtt_vma_pages(struct i915_vma *vma) 1460 { 1461 int ret; 1462 1463 /* 1464 * The vma->pages are only valid within the lifespan of the borrowed 1465 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so 1466 * must be the vma->pages. A simple rule is that vma->pages must only 1467 * be accessed when the obj->mm.pages are pinned. 1468 */ 1469 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); 1470 1471 switch (vma->ggtt_view.type) { 1472 default: 1473 GEM_BUG_ON(vma->ggtt_view.type); 1474 fallthrough; 1475 case I915_GGTT_VIEW_NORMAL: 1476 vma->pages = vma->obj->mm.pages; 1477 return 0; 1478 1479 case I915_GGTT_VIEW_ROTATED: 1480 vma->pages = 1481 intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); 1482 break; 1483 1484 case I915_GGTT_VIEW_REMAPPED: 1485 vma->pages = 1486 intel_remap_pages(&vma->ggtt_view.remapped, vma->obj); 1487 break; 1488 1489 case I915_GGTT_VIEW_PARTIAL: 1490 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); 1491 break; 1492 } 1493 1494 ret = 0; 1495 if (IS_ERR(vma->pages)) { 1496 ret = PTR_ERR(vma->pages); 1497 vma->pages = NULL; 1498 drm_err(&vma->vm->i915->drm, 1499 "Failed to get pages for VMA view type %u (%d)!\n", 1500 vma->ggtt_view.type, ret); 1501 } 1502 return ret; 1503 } 1504