1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/stop_machine.h> 7 8 #include <asm/set_memory.h> 9 #include <asm/smp.h> 10 11 #include <drm/i915_drm.h> 12 13 #include "intel_gt.h" 14 #include "i915_drv.h" 15 #include "i915_scatterlist.h" 16 #include "i915_vgpu.h" 17 18 #include "intel_gtt.h" 19 20 static int 21 i915_get_ggtt_vma_pages(struct i915_vma *vma); 22 23 static void i915_ggtt_color_adjust(const struct drm_mm_node *node, 24 unsigned long color, 25 u64 *start, 26 u64 *end) 27 { 28 if (i915_node_color_differs(node, color)) 29 *start += I915_GTT_PAGE_SIZE; 30 31 /* 32 * Also leave a space between the unallocated reserved node after the 33 * GTT and any objects within the GTT, i.e. we use the color adjustment 34 * to insert a guard page to prevent prefetches crossing over the 35 * GTT boundary. 36 */ 37 node = list_next_entry(node, node_list); 38 if (node->color != color) 39 *end -= I915_GTT_PAGE_SIZE; 40 } 41 42 static int ggtt_init_hw(struct i915_ggtt *ggtt) 43 { 44 struct drm_i915_private *i915 = ggtt->vm.i915; 45 46 i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); 47 48 ggtt->vm.is_ggtt = true; 49 50 /* Only VLV supports read-only GGTT mappings */ 51 ggtt->vm.has_read_only = IS_VALLEYVIEW(i915); 52 53 if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) 54 ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust; 55 56 if (ggtt->mappable_end) { 57 if (!io_mapping_init_wc(&ggtt->iomap, 58 ggtt->gmadr.start, 59 ggtt->mappable_end)) { 60 ggtt->vm.cleanup(&ggtt->vm); 61 return -EIO; 62 } 63 64 ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, 65 ggtt->mappable_end); 66 } 67 68 intel_ggtt_init_fences(ggtt); 69 70 return 0; 71 } 72 73 /** 74 * i915_ggtt_init_hw - Initialize GGTT hardware 75 * @i915: i915 device 76 */ 77 int i915_ggtt_init_hw(struct drm_i915_private *i915) 78 { 79 int ret; 80 81 /* 82 * Note that we use page colouring to enforce a guard page at the 83 * end of the address space. This is required as the CS may prefetch 84 * beyond the end of the batch buffer, across the page boundary, 85 * and beyond the end of the GTT if we do not provide a guard. 86 */ 87 ret = ggtt_init_hw(&i915->ggtt); 88 if (ret) 89 return ret; 90 91 return 0; 92 } 93 94 /* 95 * Certain Gen5 chipsets require require idling the GPU before 96 * unmapping anything from the GTT when VT-d is enabled. 97 */ 98 static bool needs_idle_maps(struct drm_i915_private *i915) 99 { 100 /* 101 * Query intel_iommu to see if we need the workaround. Presumably that 102 * was loaded first. 103 */ 104 return IS_GEN(i915, 5) && IS_MOBILE(i915) && intel_vtd_active(); 105 } 106 107 void i915_ggtt_suspend(struct i915_ggtt *ggtt) 108 { 109 struct i915_vma *vma, *vn; 110 int open; 111 112 mutex_lock(&ggtt->vm.mutex); 113 114 /* Skip rewriting PTE on VMA unbind. */ 115 open = atomic_xchg(&ggtt->vm.open, 0); 116 117 list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { 118 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); 119 i915_vma_wait_for_bind(vma); 120 121 if (i915_vma_is_pinned(vma)) 122 continue; 123 124 if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { 125 __i915_vma_evict(vma); 126 drm_mm_remove_node(&vma->node); 127 } 128 } 129 130 ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); 131 ggtt->invalidate(ggtt); 132 atomic_set(&ggtt->vm.open, open); 133 134 mutex_unlock(&ggtt->vm.mutex); 135 136 intel_gt_check_and_clear_faults(ggtt->vm.gt); 137 } 138 139 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) 140 { 141 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 142 143 spin_lock_irq(&uncore->lock); 144 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 145 intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6); 146 spin_unlock_irq(&uncore->lock); 147 } 148 149 static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) 150 { 151 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 152 153 /* 154 * Note that as an uncached mmio write, this will flush the 155 * WCB of the writes into the GGTT before it triggers the invalidate. 156 */ 157 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 158 } 159 160 static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) 161 { 162 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 163 struct drm_i915_private *i915 = ggtt->vm.i915; 164 165 gen8_ggtt_invalidate(ggtt); 166 167 if (INTEL_GEN(i915) >= 12) 168 intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, 169 GEN12_GUC_TLB_INV_CR_INVALIDATE); 170 else 171 intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); 172 } 173 174 static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) 175 { 176 intel_gtt_chipset_flush(); 177 } 178 179 static u64 gen8_ggtt_pte_encode(dma_addr_t addr, 180 enum i915_cache_level level, 181 u32 flags) 182 { 183 return addr | _PAGE_PRESENT; 184 } 185 186 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 187 { 188 writeq(pte, addr); 189 } 190 191 static void gen8_ggtt_insert_page(struct i915_address_space *vm, 192 dma_addr_t addr, 193 u64 offset, 194 enum i915_cache_level level, 195 u32 unused) 196 { 197 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 198 gen8_pte_t __iomem *pte = 199 (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 200 201 gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, 0)); 202 203 ggtt->invalidate(ggtt); 204 } 205 206 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 207 struct i915_vma *vma, 208 enum i915_cache_level level, 209 u32 flags) 210 { 211 const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0); 212 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 213 gen8_pte_t __iomem *gte; 214 gen8_pte_t __iomem *end; 215 struct sgt_iter iter; 216 dma_addr_t addr; 217 218 /* 219 * Note that we ignore PTE_READ_ONLY here. The caller must be careful 220 * not to allow the user to override access to a read only page. 221 */ 222 223 gte = (gen8_pte_t __iomem *)ggtt->gsm; 224 gte += vma->node.start / I915_GTT_PAGE_SIZE; 225 end = gte + vma->node.size / I915_GTT_PAGE_SIZE; 226 227 for_each_sgt_daddr(addr, iter, vma->pages) 228 gen8_set_pte(gte++, pte_encode | addr); 229 GEM_BUG_ON(gte > end); 230 231 /* Fill the allocated but "unused" space beyond the end of the buffer */ 232 while (gte < end) 233 gen8_set_pte(gte++, vm->scratch[0]->encode); 234 235 /* 236 * We want to flush the TLBs only after we're certain all the PTE 237 * updates have finished. 238 */ 239 ggtt->invalidate(ggtt); 240 } 241 242 static void gen6_ggtt_insert_page(struct i915_address_space *vm, 243 dma_addr_t addr, 244 u64 offset, 245 enum i915_cache_level level, 246 u32 flags) 247 { 248 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 249 gen6_pte_t __iomem *pte = 250 (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 251 252 iowrite32(vm->pte_encode(addr, level, flags), pte); 253 254 ggtt->invalidate(ggtt); 255 } 256 257 /* 258 * Binds an object into the global gtt with the specified cache level. 259 * The object will be accessible to the GPU via commands whose operands 260 * reference offsets within the global GTT as well as accessible by the GPU 261 * through the GMADR mapped BAR (i915->mm.gtt->gtt). 262 */ 263 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 264 struct i915_vma *vma, 265 enum i915_cache_level level, 266 u32 flags) 267 { 268 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 269 gen6_pte_t __iomem *gte; 270 gen6_pte_t __iomem *end; 271 struct sgt_iter iter; 272 dma_addr_t addr; 273 274 gte = (gen6_pte_t __iomem *)ggtt->gsm; 275 gte += vma->node.start / I915_GTT_PAGE_SIZE; 276 end = gte + vma->node.size / I915_GTT_PAGE_SIZE; 277 278 for_each_sgt_daddr(addr, iter, vma->pages) 279 iowrite32(vm->pte_encode(addr, level, flags), gte++); 280 GEM_BUG_ON(gte > end); 281 282 /* Fill the allocated but "unused" space beyond the end of the buffer */ 283 while (gte < end) 284 iowrite32(vm->scratch[0]->encode, gte++); 285 286 /* 287 * We want to flush the TLBs only after we're certain all the PTE 288 * updates have finished. 289 */ 290 ggtt->invalidate(ggtt); 291 } 292 293 static void nop_clear_range(struct i915_address_space *vm, 294 u64 start, u64 length) 295 { 296 } 297 298 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 299 u64 start, u64 length) 300 { 301 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 302 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 303 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 304 const gen8_pte_t scratch_pte = vm->scratch[0]->encode; 305 gen8_pte_t __iomem *gtt_base = 306 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 307 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 308 int i; 309 310 if (WARN(num_entries > max_entries, 311 "First entry = %d; Num entries = %d (max=%d)\n", 312 first_entry, num_entries, max_entries)) 313 num_entries = max_entries; 314 315 for (i = 0; i < num_entries; i++) 316 gen8_set_pte(>t_base[i], scratch_pte); 317 } 318 319 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) 320 { 321 /* 322 * Make sure the internal GAM fifo has been cleared of all GTT 323 * writes before exiting stop_machine(). This guarantees that 324 * any aperture accesses waiting to start in another process 325 * cannot back up behind the GTT writes causing a hang. 326 * The register can be any arbitrary GAM register. 327 */ 328 intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6); 329 } 330 331 struct insert_page { 332 struct i915_address_space *vm; 333 dma_addr_t addr; 334 u64 offset; 335 enum i915_cache_level level; 336 }; 337 338 static int bxt_vtd_ggtt_insert_page__cb(void *_arg) 339 { 340 struct insert_page *arg = _arg; 341 342 gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0); 343 bxt_vtd_ggtt_wa(arg->vm); 344 345 return 0; 346 } 347 348 static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, 349 dma_addr_t addr, 350 u64 offset, 351 enum i915_cache_level level, 352 u32 unused) 353 { 354 struct insert_page arg = { vm, addr, offset, level }; 355 356 stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); 357 } 358 359 struct insert_entries { 360 struct i915_address_space *vm; 361 struct i915_vma *vma; 362 enum i915_cache_level level; 363 u32 flags; 364 }; 365 366 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) 367 { 368 struct insert_entries *arg = _arg; 369 370 gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags); 371 bxt_vtd_ggtt_wa(arg->vm); 372 373 return 0; 374 } 375 376 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, 377 struct i915_vma *vma, 378 enum i915_cache_level level, 379 u32 flags) 380 { 381 struct insert_entries arg = { vm, vma, level, flags }; 382 383 stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); 384 } 385 386 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 387 u64 start, u64 length) 388 { 389 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 390 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 391 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 392 gen6_pte_t scratch_pte, __iomem *gtt_base = 393 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 394 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 395 int i; 396 397 if (WARN(num_entries > max_entries, 398 "First entry = %d; Num entries = %d (max=%d)\n", 399 first_entry, num_entries, max_entries)) 400 num_entries = max_entries; 401 402 scratch_pte = vm->scratch[0]->encode; 403 for (i = 0; i < num_entries; i++) 404 iowrite32(scratch_pte, >t_base[i]); 405 } 406 407 static void i915_ggtt_insert_page(struct i915_address_space *vm, 408 dma_addr_t addr, 409 u64 offset, 410 enum i915_cache_level cache_level, 411 u32 unused) 412 { 413 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 414 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 415 416 intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); 417 } 418 419 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 420 struct i915_vma *vma, 421 enum i915_cache_level cache_level, 422 u32 unused) 423 { 424 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 425 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 426 427 intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT, 428 flags); 429 } 430 431 static void i915_ggtt_clear_range(struct i915_address_space *vm, 432 u64 start, u64 length) 433 { 434 intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); 435 } 436 437 static void ggtt_bind_vma(struct i915_address_space *vm, 438 struct i915_vm_pt_stash *stash, 439 struct i915_vma *vma, 440 enum i915_cache_level cache_level, 441 u32 flags) 442 { 443 struct drm_i915_gem_object *obj = vma->obj; 444 u32 pte_flags; 445 446 if (i915_vma_is_bound(vma, ~flags & I915_VMA_BIND_MASK)) 447 return; 448 449 /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ 450 pte_flags = 0; 451 if (i915_gem_object_is_readonly(obj)) 452 pte_flags |= PTE_READ_ONLY; 453 454 vm->insert_entries(vm, vma, cache_level, pte_flags); 455 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; 456 } 457 458 static void ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma) 459 { 460 vm->clear_range(vm, vma->node.start, vma->size); 461 } 462 463 static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) 464 { 465 u64 size; 466 int ret; 467 468 if (!intel_uc_uses_guc(&ggtt->vm.gt->uc)) 469 return 0; 470 471 GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); 472 size = ggtt->vm.total - GUC_GGTT_TOP; 473 474 ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size, 475 GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, 476 PIN_NOEVICT); 477 if (ret) 478 drm_dbg(&ggtt->vm.i915->drm, 479 "Failed to reserve top of GGTT for GuC\n"); 480 481 return ret; 482 } 483 484 static void ggtt_release_guc_top(struct i915_ggtt *ggtt) 485 { 486 if (drm_mm_node_allocated(&ggtt->uc_fw)) 487 drm_mm_remove_node(&ggtt->uc_fw); 488 } 489 490 static void cleanup_init_ggtt(struct i915_ggtt *ggtt) 491 { 492 ggtt_release_guc_top(ggtt); 493 if (drm_mm_node_allocated(&ggtt->error_capture)) 494 drm_mm_remove_node(&ggtt->error_capture); 495 mutex_destroy(&ggtt->error_mutex); 496 } 497 498 static int init_ggtt(struct i915_ggtt *ggtt) 499 { 500 /* 501 * Let GEM Manage all of the aperture. 502 * 503 * However, leave one page at the end still bound to the scratch page. 504 * There are a number of places where the hardware apparently prefetches 505 * past the end of the object, and we've seen multiple hangs with the 506 * GPU head pointer stuck in a batchbuffer bound at the last page of the 507 * aperture. One page should be enough to keep any prefetching inside 508 * of the aperture. 509 */ 510 unsigned long hole_start, hole_end; 511 struct drm_mm_node *entry; 512 int ret; 513 514 /* 515 * GuC requires all resources that we're sharing with it to be placed in 516 * non-WOPCM memory. If GuC is not present or not in use we still need a 517 * small bias as ring wraparound at offset 0 sometimes hangs. No idea 518 * why. 519 */ 520 ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE, 521 intel_wopcm_guc_size(&ggtt->vm.i915->wopcm)); 522 523 ret = intel_vgt_balloon(ggtt); 524 if (ret) 525 return ret; 526 527 mutex_init(&ggtt->error_mutex); 528 if (ggtt->mappable_end) { 529 /* 530 * Reserve a mappable slot for our lockless error capture. 531 * 532 * We strongly prefer taking address 0x0 in order to protect 533 * other critical buffers against accidental overwrites, 534 * as writing to address 0 is a very common mistake. 535 * 536 * Since 0 may already be in use by the system (e.g. the BIOS 537 * framebuffer), we let the reservation fail quietly and hope 538 * 0 remains reserved always. 539 * 540 * If we fail to reserve 0, and then fail to find any space 541 * for an error-capture, remain silent. We can afford not 542 * to reserve an error_capture node as we have fallback 543 * paths, and we trust that 0 will remain reserved. However, 544 * the only likely reason for failure to insert is a driver 545 * bug, which we expect to cause other failures... 546 */ 547 ggtt->error_capture.size = I915_GTT_PAGE_SIZE; 548 ggtt->error_capture.color = I915_COLOR_UNEVICTABLE; 549 if (drm_mm_reserve_node(&ggtt->vm.mm, &ggtt->error_capture)) 550 drm_mm_insert_node_in_range(&ggtt->vm.mm, 551 &ggtt->error_capture, 552 ggtt->error_capture.size, 0, 553 ggtt->error_capture.color, 554 0, ggtt->mappable_end, 555 DRM_MM_INSERT_LOW); 556 } 557 if (drm_mm_node_allocated(&ggtt->error_capture)) 558 drm_dbg(&ggtt->vm.i915->drm, 559 "Reserved GGTT:[%llx, %llx] for use by error capture\n", 560 ggtt->error_capture.start, 561 ggtt->error_capture.start + ggtt->error_capture.size); 562 563 /* 564 * The upper portion of the GuC address space has a sizeable hole 565 * (several MB) that is inaccessible by GuC. Reserve this range within 566 * GGTT as it can comfortably hold GuC/HuC firmware images. 567 */ 568 ret = ggtt_reserve_guc_top(ggtt); 569 if (ret) 570 goto err; 571 572 /* Clear any non-preallocated blocks */ 573 drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { 574 drm_dbg(&ggtt->vm.i915->drm, 575 "clearing unused GTT space: [%lx, %lx]\n", 576 hole_start, hole_end); 577 ggtt->vm.clear_range(&ggtt->vm, hole_start, 578 hole_end - hole_start); 579 } 580 581 /* And finally clear the reserved guard page */ 582 ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE); 583 584 return 0; 585 586 err: 587 cleanup_init_ggtt(ggtt); 588 return ret; 589 } 590 591 static void aliasing_gtt_bind_vma(struct i915_address_space *vm, 592 struct i915_vm_pt_stash *stash, 593 struct i915_vma *vma, 594 enum i915_cache_level cache_level, 595 u32 flags) 596 { 597 u32 pte_flags; 598 599 /* Currently applicable only to VLV */ 600 pte_flags = 0; 601 if (i915_gem_object_is_readonly(vma->obj)) 602 pte_flags |= PTE_READ_ONLY; 603 604 if (flags & I915_VMA_LOCAL_BIND) 605 ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm, 606 stash, vma, cache_level, flags); 607 608 if (flags & I915_VMA_GLOBAL_BIND) 609 vm->insert_entries(vm, vma, cache_level, pte_flags); 610 } 611 612 static void aliasing_gtt_unbind_vma(struct i915_address_space *vm, 613 struct i915_vma *vma) 614 { 615 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 616 vm->clear_range(vm, vma->node.start, vma->size); 617 618 if (i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND)) 619 ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma); 620 } 621 622 static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) 623 { 624 struct i915_vm_pt_stash stash = {}; 625 struct i915_ppgtt *ppgtt; 626 int err; 627 628 ppgtt = i915_ppgtt_create(ggtt->vm.gt); 629 if (IS_ERR(ppgtt)) 630 return PTR_ERR(ppgtt); 631 632 if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) { 633 err = -ENODEV; 634 goto err_ppgtt; 635 } 636 637 err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, ggtt->vm.total); 638 if (err) 639 goto err_ppgtt; 640 641 err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash); 642 if (err) 643 goto err_stash; 644 645 /* 646 * Note we only pre-allocate as far as the end of the global 647 * GTT. On 48b / 4-level page-tables, the difference is very, 648 * very significant! We have to preallocate as GVT/vgpu does 649 * not like the page directory disappearing. 650 */ 651 ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total); 652 653 ggtt->alias = ppgtt; 654 ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; 655 656 GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma); 657 ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; 658 659 GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); 660 ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; 661 662 i915_vm_free_pt_stash(&ppgtt->vm, &stash); 663 return 0; 664 665 err_stash: 666 i915_vm_free_pt_stash(&ppgtt->vm, &stash); 667 err_ppgtt: 668 i915_vm_put(&ppgtt->vm); 669 return err; 670 } 671 672 static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) 673 { 674 struct i915_ppgtt *ppgtt; 675 676 ppgtt = fetch_and_zero(&ggtt->alias); 677 if (!ppgtt) 678 return; 679 680 i915_vm_put(&ppgtt->vm); 681 682 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 683 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 684 } 685 686 int i915_init_ggtt(struct drm_i915_private *i915) 687 { 688 int ret; 689 690 ret = init_ggtt(&i915->ggtt); 691 if (ret) 692 return ret; 693 694 if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) { 695 ret = init_aliasing_ppgtt(&i915->ggtt); 696 if (ret) 697 cleanup_init_ggtt(&i915->ggtt); 698 } 699 700 return 0; 701 } 702 703 static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) 704 { 705 struct i915_vma *vma, *vn; 706 707 atomic_set(&ggtt->vm.open, 0); 708 709 rcu_barrier(); /* flush the RCU'ed__i915_vm_release */ 710 flush_workqueue(ggtt->vm.i915->wq); 711 712 mutex_lock(&ggtt->vm.mutex); 713 714 list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) 715 WARN_ON(__i915_vma_unbind(vma)); 716 717 if (drm_mm_node_allocated(&ggtt->error_capture)) 718 drm_mm_remove_node(&ggtt->error_capture); 719 mutex_destroy(&ggtt->error_mutex); 720 721 ggtt_release_guc_top(ggtt); 722 intel_vgt_deballoon(ggtt); 723 724 ggtt->vm.cleanup(&ggtt->vm); 725 726 mutex_unlock(&ggtt->vm.mutex); 727 i915_address_space_fini(&ggtt->vm); 728 729 arch_phys_wc_del(ggtt->mtrr); 730 731 if (ggtt->iomap.size) 732 io_mapping_fini(&ggtt->iomap); 733 } 734 735 /** 736 * i915_ggtt_driver_release - Clean up GGTT hardware initialization 737 * @i915: i915 device 738 */ 739 void i915_ggtt_driver_release(struct drm_i915_private *i915) 740 { 741 struct i915_ggtt *ggtt = &i915->ggtt; 742 743 fini_aliasing_ppgtt(ggtt); 744 745 intel_ggtt_fini_fences(ggtt); 746 ggtt_cleanup_hw(ggtt); 747 } 748 749 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 750 { 751 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 752 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 753 return snb_gmch_ctl << 20; 754 } 755 756 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 757 { 758 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 759 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 760 if (bdw_gmch_ctl) 761 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 762 763 #ifdef CONFIG_X86_32 764 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ 765 if (bdw_gmch_ctl > 4) 766 bdw_gmch_ctl = 4; 767 #endif 768 769 return bdw_gmch_ctl << 20; 770 } 771 772 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 773 { 774 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 775 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 776 777 if (gmch_ctrl) 778 return 1 << (20 + gmch_ctrl); 779 780 return 0; 781 } 782 783 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 784 { 785 struct drm_i915_private *i915 = ggtt->vm.i915; 786 struct pci_dev *pdev = i915->drm.pdev; 787 phys_addr_t phys_addr; 788 int ret; 789 790 /* For Modern GENs the PTEs and register space are split in the BAR */ 791 phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; 792 793 /* 794 * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range 795 * will be dropped. For WC mappings in general we have 64 byte burst 796 * writes when the WC buffer is flushed, so we can't use it, but have to 797 * resort to an uncached mapping. The WC issue is easily caught by the 798 * readback check when writing GTT PTE entries. 799 */ 800 if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10) 801 ggtt->gsm = ioremap(phys_addr, size); 802 else 803 ggtt->gsm = ioremap_wc(phys_addr, size); 804 if (!ggtt->gsm) { 805 drm_err(&i915->drm, "Failed to map the ggtt page table\n"); 806 return -ENOMEM; 807 } 808 809 ret = setup_scratch_page(&ggtt->vm); 810 if (ret) { 811 drm_err(&i915->drm, "Scratch setup failed\n"); 812 /* iounmap will also get called at remove, but meh */ 813 iounmap(ggtt->gsm); 814 return ret; 815 } 816 817 ggtt->vm.scratch[0]->encode = 818 ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]), 819 I915_CACHE_NONE, 0); 820 821 return 0; 822 } 823 824 int ggtt_set_pages(struct i915_vma *vma) 825 { 826 int ret; 827 828 GEM_BUG_ON(vma->pages); 829 830 ret = i915_get_ggtt_vma_pages(vma); 831 if (ret) 832 return ret; 833 834 vma->page_sizes = vma->obj->mm.page_sizes; 835 836 return 0; 837 } 838 839 static void gen6_gmch_remove(struct i915_address_space *vm) 840 { 841 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 842 843 iounmap(ggtt->gsm); 844 free_scratch(vm); 845 } 846 847 static struct resource pci_resource(struct pci_dev *pdev, int bar) 848 { 849 return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar), 850 pci_resource_len(pdev, bar)); 851 } 852 853 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 854 { 855 struct drm_i915_private *i915 = ggtt->vm.i915; 856 struct pci_dev *pdev = i915->drm.pdev; 857 unsigned int size; 858 u16 snb_gmch_ctl; 859 860 /* TODO: We're not aware of mappable constraints on gen8 yet */ 861 if (!HAS_LMEM(i915)) { 862 ggtt->gmadr = pci_resource(pdev, 2); 863 ggtt->mappable_end = resource_size(&ggtt->gmadr); 864 } 865 866 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 867 if (IS_CHERRYVIEW(i915)) 868 size = chv_get_total_gtt_size(snb_gmch_ctl); 869 else 870 size = gen8_get_total_gtt_size(snb_gmch_ctl); 871 872 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 873 874 ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; 875 ggtt->vm.cleanup = gen6_gmch_remove; 876 ggtt->vm.insert_page = gen8_ggtt_insert_page; 877 ggtt->vm.clear_range = nop_clear_range; 878 if (intel_scanout_needs_vtd_wa(i915)) 879 ggtt->vm.clear_range = gen8_ggtt_clear_range; 880 881 ggtt->vm.insert_entries = gen8_ggtt_insert_entries; 882 883 /* Serialize GTT updates with aperture access on BXT if VT-d is on. */ 884 if (intel_ggtt_update_needs_vtd_wa(i915) || 885 IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) { 886 ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; 887 ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; 888 ggtt->vm.bind_async_flags = 889 I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; 890 } 891 892 ggtt->invalidate = gen8_ggtt_invalidate; 893 894 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 895 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 896 ggtt->vm.vma_ops.set_pages = ggtt_set_pages; 897 ggtt->vm.vma_ops.clear_pages = clear_pages; 898 899 ggtt->vm.pte_encode = gen8_ggtt_pte_encode; 900 901 setup_private_pat(ggtt->vm.gt->uncore); 902 903 return ggtt_probe_common(ggtt, size); 904 } 905 906 static u64 snb_pte_encode(dma_addr_t addr, 907 enum i915_cache_level level, 908 u32 flags) 909 { 910 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 911 912 switch (level) { 913 case I915_CACHE_L3_LLC: 914 case I915_CACHE_LLC: 915 pte |= GEN6_PTE_CACHE_LLC; 916 break; 917 case I915_CACHE_NONE: 918 pte |= GEN6_PTE_UNCACHED; 919 break; 920 default: 921 MISSING_CASE(level); 922 } 923 924 return pte; 925 } 926 927 static u64 ivb_pte_encode(dma_addr_t addr, 928 enum i915_cache_level level, 929 u32 flags) 930 { 931 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 932 933 switch (level) { 934 case I915_CACHE_L3_LLC: 935 pte |= GEN7_PTE_CACHE_L3_LLC; 936 break; 937 case I915_CACHE_LLC: 938 pte |= GEN6_PTE_CACHE_LLC; 939 break; 940 case I915_CACHE_NONE: 941 pte |= GEN6_PTE_UNCACHED; 942 break; 943 default: 944 MISSING_CASE(level); 945 } 946 947 return pte; 948 } 949 950 static u64 byt_pte_encode(dma_addr_t addr, 951 enum i915_cache_level level, 952 u32 flags) 953 { 954 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 955 956 if (!(flags & PTE_READ_ONLY)) 957 pte |= BYT_PTE_WRITEABLE; 958 959 if (level != I915_CACHE_NONE) 960 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 961 962 return pte; 963 } 964 965 static u64 hsw_pte_encode(dma_addr_t addr, 966 enum i915_cache_level level, 967 u32 flags) 968 { 969 gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 970 971 if (level != I915_CACHE_NONE) 972 pte |= HSW_WB_LLC_AGE3; 973 974 return pte; 975 } 976 977 static u64 iris_pte_encode(dma_addr_t addr, 978 enum i915_cache_level level, 979 u32 flags) 980 { 981 gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 982 983 switch (level) { 984 case I915_CACHE_NONE: 985 break; 986 case I915_CACHE_WT: 987 pte |= HSW_WT_ELLC_LLC_AGE3; 988 break; 989 default: 990 pte |= HSW_WB_ELLC_LLC_AGE3; 991 break; 992 } 993 994 return pte; 995 } 996 997 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 998 { 999 struct drm_i915_private *i915 = ggtt->vm.i915; 1000 struct pci_dev *pdev = i915->drm.pdev; 1001 unsigned int size; 1002 u16 snb_gmch_ctl; 1003 1004 ggtt->gmadr = pci_resource(pdev, 2); 1005 ggtt->mappable_end = resource_size(&ggtt->gmadr); 1006 1007 /* 1008 * 64/512MB is the current min/max we actually know of, but this is 1009 * just a coarse sanity check. 1010 */ 1011 if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { 1012 drm_err(&i915->drm, "Unknown GMADR size (%pa)\n", 1013 &ggtt->mappable_end); 1014 return -ENXIO; 1015 } 1016 1017 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1018 1019 size = gen6_get_total_gtt_size(snb_gmch_ctl); 1020 ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; 1021 1022 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 1023 1024 ggtt->vm.clear_range = nop_clear_range; 1025 if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915)) 1026 ggtt->vm.clear_range = gen6_ggtt_clear_range; 1027 ggtt->vm.insert_page = gen6_ggtt_insert_page; 1028 ggtt->vm.insert_entries = gen6_ggtt_insert_entries; 1029 ggtt->vm.cleanup = gen6_gmch_remove; 1030 1031 ggtt->invalidate = gen6_ggtt_invalidate; 1032 1033 if (HAS_EDRAM(i915)) 1034 ggtt->vm.pte_encode = iris_pte_encode; 1035 else if (IS_HASWELL(i915)) 1036 ggtt->vm.pte_encode = hsw_pte_encode; 1037 else if (IS_VALLEYVIEW(i915)) 1038 ggtt->vm.pte_encode = byt_pte_encode; 1039 else if (INTEL_GEN(i915) >= 7) 1040 ggtt->vm.pte_encode = ivb_pte_encode; 1041 else 1042 ggtt->vm.pte_encode = snb_pte_encode; 1043 1044 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 1045 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 1046 ggtt->vm.vma_ops.set_pages = ggtt_set_pages; 1047 ggtt->vm.vma_ops.clear_pages = clear_pages; 1048 1049 return ggtt_probe_common(ggtt, size); 1050 } 1051 1052 static void i915_gmch_remove(struct i915_address_space *vm) 1053 { 1054 intel_gmch_remove(); 1055 } 1056 1057 static int i915_gmch_probe(struct i915_ggtt *ggtt) 1058 { 1059 struct drm_i915_private *i915 = ggtt->vm.i915; 1060 phys_addr_t gmadr_base; 1061 int ret; 1062 1063 ret = intel_gmch_probe(i915->bridge_dev, i915->drm.pdev, NULL); 1064 if (!ret) { 1065 drm_err(&i915->drm, "failed to set up gmch\n"); 1066 return -EIO; 1067 } 1068 1069 intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end); 1070 1071 ggtt->gmadr = 1072 (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); 1073 1074 ggtt->vm.alloc_pt_dma = alloc_pt_dma; 1075 1076 ggtt->do_idle_maps = needs_idle_maps(i915); 1077 ggtt->vm.insert_page = i915_ggtt_insert_page; 1078 ggtt->vm.insert_entries = i915_ggtt_insert_entries; 1079 ggtt->vm.clear_range = i915_ggtt_clear_range; 1080 ggtt->vm.cleanup = i915_gmch_remove; 1081 1082 ggtt->invalidate = gmch_ggtt_invalidate; 1083 1084 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 1085 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 1086 ggtt->vm.vma_ops.set_pages = ggtt_set_pages; 1087 ggtt->vm.vma_ops.clear_pages = clear_pages; 1088 1089 if (unlikely(ggtt->do_idle_maps)) 1090 drm_notice(&i915->drm, 1091 "Applying Ironlake quirks for intel_iommu\n"); 1092 1093 return 0; 1094 } 1095 1096 static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) 1097 { 1098 struct drm_i915_private *i915 = gt->i915; 1099 int ret; 1100 1101 ggtt->vm.gt = gt; 1102 ggtt->vm.i915 = i915; 1103 ggtt->vm.dma = &i915->drm.pdev->dev; 1104 1105 if (INTEL_GEN(i915) <= 5) 1106 ret = i915_gmch_probe(ggtt); 1107 else if (INTEL_GEN(i915) < 8) 1108 ret = gen6_gmch_probe(ggtt); 1109 else 1110 ret = gen8_gmch_probe(ggtt); 1111 if (ret) 1112 return ret; 1113 1114 if ((ggtt->vm.total - 1) >> 32) { 1115 drm_err(&i915->drm, 1116 "We never expected a Global GTT with more than 32bits" 1117 " of address space! Found %lldM!\n", 1118 ggtt->vm.total >> 20); 1119 ggtt->vm.total = 1ULL << 32; 1120 ggtt->mappable_end = 1121 min_t(u64, ggtt->mappable_end, ggtt->vm.total); 1122 } 1123 1124 if (ggtt->mappable_end > ggtt->vm.total) { 1125 drm_err(&i915->drm, 1126 "mappable aperture extends past end of GGTT," 1127 " aperture=%pa, total=%llx\n", 1128 &ggtt->mappable_end, ggtt->vm.total); 1129 ggtt->mappable_end = ggtt->vm.total; 1130 } 1131 1132 /* GMADR is the PCI mmio aperture into the global GTT. */ 1133 drm_dbg(&i915->drm, "GGTT size = %lluM\n", ggtt->vm.total >> 20); 1134 drm_dbg(&i915->drm, "GMADR size = %lluM\n", 1135 (u64)ggtt->mappable_end >> 20); 1136 drm_dbg(&i915->drm, "DSM size = %lluM\n", 1137 (u64)resource_size(&intel_graphics_stolen_res) >> 20); 1138 1139 return 0; 1140 } 1141 1142 /** 1143 * i915_ggtt_probe_hw - Probe GGTT hardware location 1144 * @i915: i915 device 1145 */ 1146 int i915_ggtt_probe_hw(struct drm_i915_private *i915) 1147 { 1148 int ret; 1149 1150 ret = ggtt_probe_hw(&i915->ggtt, &i915->gt); 1151 if (ret) 1152 return ret; 1153 1154 if (intel_vtd_active()) 1155 drm_info(&i915->drm, "VT-d active for gfx access\n"); 1156 1157 return 0; 1158 } 1159 1160 int i915_ggtt_enable_hw(struct drm_i915_private *i915) 1161 { 1162 if (INTEL_GEN(i915) < 6 && !intel_enable_gtt()) 1163 return -EIO; 1164 1165 return 0; 1166 } 1167 1168 void i915_ggtt_enable_guc(struct i915_ggtt *ggtt) 1169 { 1170 GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate); 1171 1172 ggtt->invalidate = guc_ggtt_invalidate; 1173 1174 ggtt->invalidate(ggtt); 1175 } 1176 1177 void i915_ggtt_disable_guc(struct i915_ggtt *ggtt) 1178 { 1179 /* XXX Temporary pardon for error unload */ 1180 if (ggtt->invalidate == gen8_ggtt_invalidate) 1181 return; 1182 1183 /* We should only be called after i915_ggtt_enable_guc() */ 1184 GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate); 1185 1186 ggtt->invalidate = gen8_ggtt_invalidate; 1187 1188 ggtt->invalidate(ggtt); 1189 } 1190 1191 void i915_ggtt_resume(struct i915_ggtt *ggtt) 1192 { 1193 struct i915_vma *vma; 1194 bool flush = false; 1195 int open; 1196 1197 intel_gt_check_and_clear_faults(ggtt->vm.gt); 1198 1199 /* First fill our portion of the GTT with scratch pages */ 1200 ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); 1201 1202 /* Skip rewriting PTE on VMA unbind. */ 1203 open = atomic_xchg(&ggtt->vm.open, 0); 1204 1205 /* clflush objects bound into the GGTT and rebind them. */ 1206 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) { 1207 struct drm_i915_gem_object *obj = vma->obj; 1208 unsigned int was_bound = 1209 atomic_read(&vma->flags) & I915_VMA_BIND_MASK; 1210 1211 GEM_BUG_ON(!was_bound); 1212 vma->ops->bind_vma(&ggtt->vm, NULL, vma, 1213 obj ? obj->cache_level : 0, 1214 was_bound); 1215 if (obj) { /* only used during resume => exclusive access */ 1216 flush |= fetch_and_zero(&obj->write_domain); 1217 obj->read_domains |= I915_GEM_DOMAIN_GTT; 1218 } 1219 } 1220 1221 atomic_set(&ggtt->vm.open, open); 1222 ggtt->invalidate(ggtt); 1223 1224 if (flush) 1225 wbinvd_on_all_cpus(); 1226 1227 if (INTEL_GEN(ggtt->vm.i915) >= 8) 1228 setup_private_pat(ggtt->vm.gt->uncore); 1229 1230 intel_ggtt_restore_fences(ggtt); 1231 } 1232 1233 static struct scatterlist * 1234 rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset, 1235 unsigned int width, unsigned int height, 1236 unsigned int stride, 1237 struct sg_table *st, struct scatterlist *sg) 1238 { 1239 unsigned int column, row; 1240 unsigned int src_idx; 1241 1242 for (column = 0; column < width; column++) { 1243 src_idx = stride * (height - 1) + column + offset; 1244 for (row = 0; row < height; row++) { 1245 st->nents++; 1246 /* 1247 * We don't need the pages, but need to initialize 1248 * the entries so the sg list can be happily traversed. 1249 * The only thing we need are DMA addresses. 1250 */ 1251 sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0); 1252 sg_dma_address(sg) = 1253 i915_gem_object_get_dma_address(obj, src_idx); 1254 sg_dma_len(sg) = I915_GTT_PAGE_SIZE; 1255 sg = sg_next(sg); 1256 src_idx -= stride; 1257 } 1258 } 1259 1260 return sg; 1261 } 1262 1263 static noinline struct sg_table * 1264 intel_rotate_pages(struct intel_rotation_info *rot_info, 1265 struct drm_i915_gem_object *obj) 1266 { 1267 unsigned int size = intel_rotation_info_size(rot_info); 1268 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1269 struct sg_table *st; 1270 struct scatterlist *sg; 1271 int ret = -ENOMEM; 1272 int i; 1273 1274 /* Allocate target SG list. */ 1275 st = kmalloc(sizeof(*st), GFP_KERNEL); 1276 if (!st) 1277 goto err_st_alloc; 1278 1279 ret = sg_alloc_table(st, size, GFP_KERNEL); 1280 if (ret) 1281 goto err_sg_alloc; 1282 1283 st->nents = 0; 1284 sg = st->sgl; 1285 1286 for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { 1287 sg = rotate_pages(obj, rot_info->plane[i].offset, 1288 rot_info->plane[i].width, rot_info->plane[i].height, 1289 rot_info->plane[i].stride, st, sg); 1290 } 1291 1292 return st; 1293 1294 err_sg_alloc: 1295 kfree(st); 1296 err_st_alloc: 1297 1298 drm_dbg(&i915->drm, "Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", 1299 obj->base.size, rot_info->plane[0].width, 1300 rot_info->plane[0].height, size); 1301 1302 return ERR_PTR(ret); 1303 } 1304 1305 static struct scatterlist * 1306 remap_pages(struct drm_i915_gem_object *obj, unsigned int offset, 1307 unsigned int width, unsigned int height, 1308 unsigned int stride, 1309 struct sg_table *st, struct scatterlist *sg) 1310 { 1311 unsigned int row; 1312 1313 for (row = 0; row < height; row++) { 1314 unsigned int left = width * I915_GTT_PAGE_SIZE; 1315 1316 while (left) { 1317 dma_addr_t addr; 1318 unsigned int length; 1319 1320 /* 1321 * We don't need the pages, but need to initialize 1322 * the entries so the sg list can be happily traversed. 1323 * The only thing we need are DMA addresses. 1324 */ 1325 1326 addr = i915_gem_object_get_dma_address_len(obj, offset, &length); 1327 1328 length = min(left, length); 1329 1330 st->nents++; 1331 1332 sg_set_page(sg, NULL, length, 0); 1333 sg_dma_address(sg) = addr; 1334 sg_dma_len(sg) = length; 1335 sg = sg_next(sg); 1336 1337 offset += length / I915_GTT_PAGE_SIZE; 1338 left -= length; 1339 } 1340 1341 offset += stride - width; 1342 } 1343 1344 return sg; 1345 } 1346 1347 static noinline struct sg_table * 1348 intel_remap_pages(struct intel_remapped_info *rem_info, 1349 struct drm_i915_gem_object *obj) 1350 { 1351 unsigned int size = intel_remapped_info_size(rem_info); 1352 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1353 struct sg_table *st; 1354 struct scatterlist *sg; 1355 int ret = -ENOMEM; 1356 int i; 1357 1358 /* Allocate target SG list. */ 1359 st = kmalloc(sizeof(*st), GFP_KERNEL); 1360 if (!st) 1361 goto err_st_alloc; 1362 1363 ret = sg_alloc_table(st, size, GFP_KERNEL); 1364 if (ret) 1365 goto err_sg_alloc; 1366 1367 st->nents = 0; 1368 sg = st->sgl; 1369 1370 for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) { 1371 sg = remap_pages(obj, rem_info->plane[i].offset, 1372 rem_info->plane[i].width, rem_info->plane[i].height, 1373 rem_info->plane[i].stride, st, sg); 1374 } 1375 1376 i915_sg_trim(st); 1377 1378 return st; 1379 1380 err_sg_alloc: 1381 kfree(st); 1382 err_st_alloc: 1383 1384 drm_dbg(&i915->drm, "Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n", 1385 obj->base.size, rem_info->plane[0].width, 1386 rem_info->plane[0].height, size); 1387 1388 return ERR_PTR(ret); 1389 } 1390 1391 static noinline struct sg_table * 1392 intel_partial_pages(const struct i915_ggtt_view *view, 1393 struct drm_i915_gem_object *obj) 1394 { 1395 struct sg_table *st; 1396 struct scatterlist *sg, *iter; 1397 unsigned int count = view->partial.size; 1398 unsigned int offset; 1399 int ret = -ENOMEM; 1400 1401 st = kmalloc(sizeof(*st), GFP_KERNEL); 1402 if (!st) 1403 goto err_st_alloc; 1404 1405 ret = sg_alloc_table(st, count, GFP_KERNEL); 1406 if (ret) 1407 goto err_sg_alloc; 1408 1409 iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset); 1410 GEM_BUG_ON(!iter); 1411 1412 sg = st->sgl; 1413 st->nents = 0; 1414 do { 1415 unsigned int len; 1416 1417 len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT), 1418 count << PAGE_SHIFT); 1419 sg_set_page(sg, NULL, len, 0); 1420 sg_dma_address(sg) = 1421 sg_dma_address(iter) + (offset << PAGE_SHIFT); 1422 sg_dma_len(sg) = len; 1423 1424 st->nents++; 1425 count -= len >> PAGE_SHIFT; 1426 if (count == 0) { 1427 sg_mark_end(sg); 1428 i915_sg_trim(st); /* Drop any unused tail entries. */ 1429 1430 return st; 1431 } 1432 1433 sg = __sg_next(sg); 1434 iter = __sg_next(iter); 1435 offset = 0; 1436 } while (1); 1437 1438 err_sg_alloc: 1439 kfree(st); 1440 err_st_alloc: 1441 return ERR_PTR(ret); 1442 } 1443 1444 static int 1445 i915_get_ggtt_vma_pages(struct i915_vma *vma) 1446 { 1447 int ret; 1448 1449 /* 1450 * The vma->pages are only valid within the lifespan of the borrowed 1451 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so 1452 * must be the vma->pages. A simple rule is that vma->pages must only 1453 * be accessed when the obj->mm.pages are pinned. 1454 */ 1455 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); 1456 1457 switch (vma->ggtt_view.type) { 1458 default: 1459 GEM_BUG_ON(vma->ggtt_view.type); 1460 fallthrough; 1461 case I915_GGTT_VIEW_NORMAL: 1462 vma->pages = vma->obj->mm.pages; 1463 return 0; 1464 1465 case I915_GGTT_VIEW_ROTATED: 1466 vma->pages = 1467 intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); 1468 break; 1469 1470 case I915_GGTT_VIEW_REMAPPED: 1471 vma->pages = 1472 intel_remap_pages(&vma->ggtt_view.remapped, vma->obj); 1473 break; 1474 1475 case I915_GGTT_VIEW_PARTIAL: 1476 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); 1477 break; 1478 } 1479 1480 ret = 0; 1481 if (IS_ERR(vma->pages)) { 1482 ret = PTR_ERR(vma->pages); 1483 vma->pages = NULL; 1484 drm_err(&vma->vm->i915->drm, 1485 "Failed to get pages for VMA view type %u (%d)!\n", 1486 vma->ggtt_view.type, ret); 1487 } 1488 return ret; 1489 } 1490