1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/stop_machine.h> 7 8 #include <asm/set_memory.h> 9 #include <asm/smp.h> 10 11 #include <drm/i915_drm.h> 12 13 #include "intel_gt.h" 14 #include "i915_drv.h" 15 #include "i915_scatterlist.h" 16 #include "i915_vgpu.h" 17 18 #include "intel_gtt.h" 19 20 static int 21 i915_get_ggtt_vma_pages(struct i915_vma *vma); 22 23 static void i915_ggtt_color_adjust(const struct drm_mm_node *node, 24 unsigned long color, 25 u64 *start, 26 u64 *end) 27 { 28 if (i915_node_color_differs(node, color)) 29 *start += I915_GTT_PAGE_SIZE; 30 31 /* 32 * Also leave a space between the unallocated reserved node after the 33 * GTT and any objects within the GTT, i.e. we use the color adjustment 34 * to insert a guard page to prevent prefetches crossing over the 35 * GTT boundary. 36 */ 37 node = list_next_entry(node, node_list); 38 if (node->color != color) 39 *end -= I915_GTT_PAGE_SIZE; 40 } 41 42 static int ggtt_init_hw(struct i915_ggtt *ggtt) 43 { 44 struct drm_i915_private *i915 = ggtt->vm.i915; 45 46 i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT); 47 48 ggtt->vm.is_ggtt = true; 49 50 /* Only VLV supports read-only GGTT mappings */ 51 ggtt->vm.has_read_only = IS_VALLEYVIEW(i915); 52 53 if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) 54 ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust; 55 56 if (ggtt->mappable_end) { 57 if (!io_mapping_init_wc(&ggtt->iomap, 58 ggtt->gmadr.start, 59 ggtt->mappable_end)) { 60 ggtt->vm.cleanup(&ggtt->vm); 61 return -EIO; 62 } 63 64 ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, 65 ggtt->mappable_end); 66 } 67 68 i915_ggtt_init_fences(ggtt); 69 70 return 0; 71 } 72 73 /** 74 * i915_ggtt_init_hw - Initialize GGTT hardware 75 * @i915: i915 device 76 */ 77 int i915_ggtt_init_hw(struct drm_i915_private *i915) 78 { 79 int ret; 80 81 stash_init(&i915->mm.wc_stash); 82 83 /* 84 * Note that we use page colouring to enforce a guard page at the 85 * end of the address space. This is required as the CS may prefetch 86 * beyond the end of the batch buffer, across the page boundary, 87 * and beyond the end of the GTT if we do not provide a guard. 88 */ 89 ret = ggtt_init_hw(&i915->ggtt); 90 if (ret) 91 return ret; 92 93 return 0; 94 } 95 96 /* 97 * Certain Gen5 chipsets require require idling the GPU before 98 * unmapping anything from the GTT when VT-d is enabled. 99 */ 100 static bool needs_idle_maps(struct drm_i915_private *i915) 101 { 102 /* 103 * Query intel_iommu to see if we need the workaround. Presumably that 104 * was loaded first. 105 */ 106 return IS_GEN(i915, 5) && IS_MOBILE(i915) && intel_vtd_active(); 107 } 108 109 void i915_ggtt_suspend(struct i915_ggtt *ggtt) 110 { 111 struct i915_vma *vma; 112 113 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) 114 i915_vma_wait_for_bind(vma); 115 116 ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); 117 ggtt->invalidate(ggtt); 118 119 intel_gt_check_and_clear_faults(ggtt->vm.gt); 120 } 121 122 void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) 123 { 124 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 125 126 spin_lock_irq(&uncore->lock); 127 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 128 intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6); 129 spin_unlock_irq(&uncore->lock); 130 } 131 132 static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) 133 { 134 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 135 136 /* 137 * Note that as an uncached mmio write, this will flush the 138 * WCB of the writes into the GGTT before it triggers the invalidate. 139 */ 140 intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 141 } 142 143 static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) 144 { 145 struct intel_uncore *uncore = ggtt->vm.gt->uncore; 146 struct drm_i915_private *i915 = ggtt->vm.i915; 147 148 gen8_ggtt_invalidate(ggtt); 149 150 if (INTEL_GEN(i915) >= 12) 151 intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR, 152 GEN12_GUC_TLB_INV_CR_INVALIDATE); 153 else 154 intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); 155 } 156 157 static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt) 158 { 159 intel_gtt_chipset_flush(); 160 } 161 162 static u64 gen8_ggtt_pte_encode(dma_addr_t addr, 163 enum i915_cache_level level, 164 u32 flags) 165 { 166 return addr | _PAGE_PRESENT; 167 } 168 169 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 170 { 171 writeq(pte, addr); 172 } 173 174 static void gen8_ggtt_insert_page(struct i915_address_space *vm, 175 dma_addr_t addr, 176 u64 offset, 177 enum i915_cache_level level, 178 u32 unused) 179 { 180 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 181 gen8_pte_t __iomem *pte = 182 (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 183 184 gen8_set_pte(pte, gen8_ggtt_pte_encode(addr, level, 0)); 185 186 ggtt->invalidate(ggtt); 187 } 188 189 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 190 struct i915_vma *vma, 191 enum i915_cache_level level, 192 u32 flags) 193 { 194 const gen8_pte_t pte_encode = gen8_ggtt_pte_encode(0, level, 0); 195 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 196 gen8_pte_t __iomem *gte; 197 gen8_pte_t __iomem *end; 198 struct sgt_iter iter; 199 dma_addr_t addr; 200 201 /* 202 * Note that we ignore PTE_READ_ONLY here. The caller must be careful 203 * not to allow the user to override access to a read only page. 204 */ 205 206 gte = (gen8_pte_t __iomem *)ggtt->gsm; 207 gte += vma->node.start / I915_GTT_PAGE_SIZE; 208 end = gte + vma->node.size / I915_GTT_PAGE_SIZE; 209 210 for_each_sgt_daddr(addr, iter, vma->pages) 211 gen8_set_pte(gte++, pte_encode | addr); 212 GEM_BUG_ON(gte > end); 213 214 /* Fill the allocated but "unused" space beyond the end of the buffer */ 215 while (gte < end) 216 gen8_set_pte(gte++, vm->scratch[0].encode); 217 218 /* 219 * We want to flush the TLBs only after we're certain all the PTE 220 * updates have finished. 221 */ 222 ggtt->invalidate(ggtt); 223 } 224 225 static void gen6_ggtt_insert_page(struct i915_address_space *vm, 226 dma_addr_t addr, 227 u64 offset, 228 enum i915_cache_level level, 229 u32 flags) 230 { 231 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 232 gen6_pte_t __iomem *pte = 233 (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; 234 235 iowrite32(vm->pte_encode(addr, level, flags), pte); 236 237 ggtt->invalidate(ggtt); 238 } 239 240 /* 241 * Binds an object into the global gtt with the specified cache level. 242 * The object will be accessible to the GPU via commands whose operands 243 * reference offsets within the global GTT as well as accessible by the GPU 244 * through the GMADR mapped BAR (i915->mm.gtt->gtt). 245 */ 246 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 247 struct i915_vma *vma, 248 enum i915_cache_level level, 249 u32 flags) 250 { 251 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 252 gen6_pte_t __iomem *gte; 253 gen6_pte_t __iomem *end; 254 struct sgt_iter iter; 255 dma_addr_t addr; 256 257 gte = (gen6_pte_t __iomem *)ggtt->gsm; 258 gte += vma->node.start / I915_GTT_PAGE_SIZE; 259 end = gte + vma->node.size / I915_GTT_PAGE_SIZE; 260 261 for_each_sgt_daddr(addr, iter, vma->pages) 262 iowrite32(vm->pte_encode(addr, level, flags), gte++); 263 GEM_BUG_ON(gte > end); 264 265 /* Fill the allocated but "unused" space beyond the end of the buffer */ 266 while (gte < end) 267 iowrite32(vm->scratch[0].encode, gte++); 268 269 /* 270 * We want to flush the TLBs only after we're certain all the PTE 271 * updates have finished. 272 */ 273 ggtt->invalidate(ggtt); 274 } 275 276 static void nop_clear_range(struct i915_address_space *vm, 277 u64 start, u64 length) 278 { 279 } 280 281 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 282 u64 start, u64 length) 283 { 284 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 285 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 286 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 287 const gen8_pte_t scratch_pte = vm->scratch[0].encode; 288 gen8_pte_t __iomem *gtt_base = 289 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 290 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 291 int i; 292 293 if (WARN(num_entries > max_entries, 294 "First entry = %d; Num entries = %d (max=%d)\n", 295 first_entry, num_entries, max_entries)) 296 num_entries = max_entries; 297 298 for (i = 0; i < num_entries; i++) 299 gen8_set_pte(>t_base[i], scratch_pte); 300 } 301 302 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) 303 { 304 /* 305 * Make sure the internal GAM fifo has been cleared of all GTT 306 * writes before exiting stop_machine(). This guarantees that 307 * any aperture accesses waiting to start in another process 308 * cannot back up behind the GTT writes causing a hang. 309 * The register can be any arbitrary GAM register. 310 */ 311 intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6); 312 } 313 314 struct insert_page { 315 struct i915_address_space *vm; 316 dma_addr_t addr; 317 u64 offset; 318 enum i915_cache_level level; 319 }; 320 321 static int bxt_vtd_ggtt_insert_page__cb(void *_arg) 322 { 323 struct insert_page *arg = _arg; 324 325 gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0); 326 bxt_vtd_ggtt_wa(arg->vm); 327 328 return 0; 329 } 330 331 static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, 332 dma_addr_t addr, 333 u64 offset, 334 enum i915_cache_level level, 335 u32 unused) 336 { 337 struct insert_page arg = { vm, addr, offset, level }; 338 339 stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); 340 } 341 342 struct insert_entries { 343 struct i915_address_space *vm; 344 struct i915_vma *vma; 345 enum i915_cache_level level; 346 u32 flags; 347 }; 348 349 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) 350 { 351 struct insert_entries *arg = _arg; 352 353 gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags); 354 bxt_vtd_ggtt_wa(arg->vm); 355 356 return 0; 357 } 358 359 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, 360 struct i915_vma *vma, 361 enum i915_cache_level level, 362 u32 flags) 363 { 364 struct insert_entries arg = { vm, vma, level, flags }; 365 366 stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); 367 } 368 369 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 370 u64 start, u64 length) 371 { 372 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 373 unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 374 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 375 gen6_pte_t scratch_pte, __iomem *gtt_base = 376 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 377 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 378 int i; 379 380 if (WARN(num_entries > max_entries, 381 "First entry = %d; Num entries = %d (max=%d)\n", 382 first_entry, num_entries, max_entries)) 383 num_entries = max_entries; 384 385 scratch_pte = vm->scratch[0].encode; 386 for (i = 0; i < num_entries; i++) 387 iowrite32(scratch_pte, >t_base[i]); 388 } 389 390 static void i915_ggtt_insert_page(struct i915_address_space *vm, 391 dma_addr_t addr, 392 u64 offset, 393 enum i915_cache_level cache_level, 394 u32 unused) 395 { 396 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 397 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 398 399 intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); 400 } 401 402 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 403 struct i915_vma *vma, 404 enum i915_cache_level cache_level, 405 u32 unused) 406 { 407 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 408 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 409 410 intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT, 411 flags); 412 } 413 414 static void i915_ggtt_clear_range(struct i915_address_space *vm, 415 u64 start, u64 length) 416 { 417 intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); 418 } 419 420 static int ggtt_bind_vma(struct i915_vma *vma, 421 enum i915_cache_level cache_level, 422 u32 flags) 423 { 424 struct drm_i915_gem_object *obj = vma->obj; 425 u32 pte_flags; 426 427 /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ 428 pte_flags = 0; 429 if (i915_gem_object_is_readonly(obj)) 430 pte_flags |= PTE_READ_ONLY; 431 432 vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); 433 434 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; 435 436 /* 437 * Without aliasing PPGTT there's no difference between 438 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally 439 * upgrade to both bound if we bind either to avoid double-binding. 440 */ 441 atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags); 442 443 return 0; 444 } 445 446 static void ggtt_unbind_vma(struct i915_vma *vma) 447 { 448 vma->vm->clear_range(vma->vm, vma->node.start, vma->size); 449 } 450 451 static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) 452 { 453 u64 size; 454 int ret; 455 456 if (!intel_uc_uses_guc(&ggtt->vm.gt->uc)) 457 return 0; 458 459 GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); 460 size = ggtt->vm.total - GUC_GGTT_TOP; 461 462 ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size, 463 GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, 464 PIN_NOEVICT); 465 if (ret) 466 drm_dbg(&ggtt->vm.i915->drm, 467 "Failed to reserve top of GGTT for GuC\n"); 468 469 return ret; 470 } 471 472 static void ggtt_release_guc_top(struct i915_ggtt *ggtt) 473 { 474 if (drm_mm_node_allocated(&ggtt->uc_fw)) 475 drm_mm_remove_node(&ggtt->uc_fw); 476 } 477 478 static void cleanup_init_ggtt(struct i915_ggtt *ggtt) 479 { 480 ggtt_release_guc_top(ggtt); 481 if (drm_mm_node_allocated(&ggtt->error_capture)) 482 drm_mm_remove_node(&ggtt->error_capture); 483 mutex_destroy(&ggtt->error_mutex); 484 } 485 486 static int init_ggtt(struct i915_ggtt *ggtt) 487 { 488 /* 489 * Let GEM Manage all of the aperture. 490 * 491 * However, leave one page at the end still bound to the scratch page. 492 * There are a number of places where the hardware apparently prefetches 493 * past the end of the object, and we've seen multiple hangs with the 494 * GPU head pointer stuck in a batchbuffer bound at the last page of the 495 * aperture. One page should be enough to keep any prefetching inside 496 * of the aperture. 497 */ 498 unsigned long hole_start, hole_end; 499 struct drm_mm_node *entry; 500 int ret; 501 502 /* 503 * GuC requires all resources that we're sharing with it to be placed in 504 * non-WOPCM memory. If GuC is not present or not in use we still need a 505 * small bias as ring wraparound at offset 0 sometimes hangs. No idea 506 * why. 507 */ 508 ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE, 509 intel_wopcm_guc_size(&ggtt->vm.i915->wopcm)); 510 511 ret = intel_vgt_balloon(ggtt); 512 if (ret) 513 return ret; 514 515 mutex_init(&ggtt->error_mutex); 516 if (ggtt->mappable_end) { 517 /* Reserve a mappable slot for our lockless error capture */ 518 ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, 519 &ggtt->error_capture, 520 PAGE_SIZE, 0, 521 I915_COLOR_UNEVICTABLE, 522 0, ggtt->mappable_end, 523 DRM_MM_INSERT_LOW); 524 if (ret) 525 return ret; 526 } 527 528 /* 529 * The upper portion of the GuC address space has a sizeable hole 530 * (several MB) that is inaccessible by GuC. Reserve this range within 531 * GGTT as it can comfortably hold GuC/HuC firmware images. 532 */ 533 ret = ggtt_reserve_guc_top(ggtt); 534 if (ret) 535 goto err; 536 537 /* Clear any non-preallocated blocks */ 538 drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { 539 drm_dbg_kms(&ggtt->vm.i915->drm, 540 "clearing unused GTT space: [%lx, %lx]\n", 541 hole_start, hole_end); 542 ggtt->vm.clear_range(&ggtt->vm, hole_start, 543 hole_end - hole_start); 544 } 545 546 /* And finally clear the reserved guard page */ 547 ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE); 548 549 return 0; 550 551 err: 552 cleanup_init_ggtt(ggtt); 553 return ret; 554 } 555 556 static int aliasing_gtt_bind_vma(struct i915_vma *vma, 557 enum i915_cache_level cache_level, 558 u32 flags) 559 { 560 u32 pte_flags; 561 int ret; 562 563 /* Currently applicable only to VLV */ 564 pte_flags = 0; 565 if (i915_gem_object_is_readonly(vma->obj)) 566 pte_flags |= PTE_READ_ONLY; 567 568 if (flags & I915_VMA_LOCAL_BIND) { 569 struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias; 570 571 if (flags & I915_VMA_ALLOC) { 572 ret = alias->vm.allocate_va_range(&alias->vm, 573 vma->node.start, 574 vma->size); 575 if (ret) 576 return ret; 577 578 set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)); 579 } 580 581 GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, 582 __i915_vma_flags(vma))); 583 alias->vm.insert_entries(&alias->vm, vma, 584 cache_level, pte_flags); 585 } 586 587 if (flags & I915_VMA_GLOBAL_BIND) 588 vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); 589 590 return 0; 591 } 592 593 static void aliasing_gtt_unbind_vma(struct i915_vma *vma) 594 { 595 if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { 596 struct i915_address_space *vm = vma->vm; 597 598 vm->clear_range(vm, vma->node.start, vma->size); 599 } 600 601 if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) { 602 struct i915_address_space *vm = 603 &i915_vm_to_ggtt(vma->vm)->alias->vm; 604 605 vm->clear_range(vm, vma->node.start, vma->size); 606 } 607 } 608 609 static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) 610 { 611 struct i915_ppgtt *ppgtt; 612 int err; 613 614 ppgtt = i915_ppgtt_create(ggtt->vm.gt); 615 if (IS_ERR(ppgtt)) 616 return PTR_ERR(ppgtt); 617 618 if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) { 619 err = -ENODEV; 620 goto err_ppgtt; 621 } 622 623 /* 624 * Note we only pre-allocate as far as the end of the global 625 * GTT. On 48b / 4-level page-tables, the difference is very, 626 * very significant! We have to preallocate as GVT/vgpu does 627 * not like the page directory disappearing. 628 */ 629 err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total); 630 if (err) 631 goto err_ppgtt; 632 633 ggtt->alias = ppgtt; 634 ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; 635 636 GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma); 637 ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; 638 639 GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); 640 ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; 641 642 return 0; 643 644 err_ppgtt: 645 i915_vm_put(&ppgtt->vm); 646 return err; 647 } 648 649 static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) 650 { 651 struct i915_ppgtt *ppgtt; 652 653 ppgtt = fetch_and_zero(&ggtt->alias); 654 if (!ppgtt) 655 return; 656 657 i915_vm_put(&ppgtt->vm); 658 659 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 660 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 661 } 662 663 int i915_init_ggtt(struct drm_i915_private *i915) 664 { 665 int ret; 666 667 ret = init_ggtt(&i915->ggtt); 668 if (ret) 669 return ret; 670 671 if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) { 672 ret = init_aliasing_ppgtt(&i915->ggtt); 673 if (ret) 674 cleanup_init_ggtt(&i915->ggtt); 675 } 676 677 return 0; 678 } 679 680 static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) 681 { 682 struct i915_vma *vma, *vn; 683 684 atomic_set(&ggtt->vm.open, 0); 685 686 rcu_barrier(); /* flush the RCU'ed__i915_vm_release */ 687 flush_workqueue(ggtt->vm.i915->wq); 688 689 mutex_lock(&ggtt->vm.mutex); 690 691 list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) 692 WARN_ON(__i915_vma_unbind(vma)); 693 694 if (drm_mm_node_allocated(&ggtt->error_capture)) 695 drm_mm_remove_node(&ggtt->error_capture); 696 mutex_destroy(&ggtt->error_mutex); 697 698 ggtt_release_guc_top(ggtt); 699 intel_vgt_deballoon(ggtt); 700 701 ggtt->vm.cleanup(&ggtt->vm); 702 703 mutex_unlock(&ggtt->vm.mutex); 704 i915_address_space_fini(&ggtt->vm); 705 706 arch_phys_wc_del(ggtt->mtrr); 707 708 if (ggtt->iomap.size) 709 io_mapping_fini(&ggtt->iomap); 710 } 711 712 /** 713 * i915_ggtt_driver_release - Clean up GGTT hardware initialization 714 * @i915: i915 device 715 */ 716 void i915_ggtt_driver_release(struct drm_i915_private *i915) 717 { 718 struct pagevec *pvec; 719 720 fini_aliasing_ppgtt(&i915->ggtt); 721 722 ggtt_cleanup_hw(&i915->ggtt); 723 724 pvec = &i915->mm.wc_stash.pvec; 725 if (pvec->nr) { 726 set_pages_array_wb(pvec->pages, pvec->nr); 727 __pagevec_release(pvec); 728 } 729 } 730 731 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 732 { 733 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 734 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 735 return snb_gmch_ctl << 20; 736 } 737 738 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 739 { 740 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 741 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 742 if (bdw_gmch_ctl) 743 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 744 745 #ifdef CONFIG_X86_32 746 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ 747 if (bdw_gmch_ctl > 4) 748 bdw_gmch_ctl = 4; 749 #endif 750 751 return bdw_gmch_ctl << 20; 752 } 753 754 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 755 { 756 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 757 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 758 759 if (gmch_ctrl) 760 return 1 << (20 + gmch_ctrl); 761 762 return 0; 763 } 764 765 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 766 { 767 struct drm_i915_private *i915 = ggtt->vm.i915; 768 struct pci_dev *pdev = i915->drm.pdev; 769 phys_addr_t phys_addr; 770 int ret; 771 772 /* For Modern GENs the PTEs and register space are split in the BAR */ 773 phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; 774 775 /* 776 * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range 777 * will be dropped. For WC mappings in general we have 64 byte burst 778 * writes when the WC buffer is flushed, so we can't use it, but have to 779 * resort to an uncached mapping. The WC issue is easily caught by the 780 * readback check when writing GTT PTE entries. 781 */ 782 if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10) 783 ggtt->gsm = ioremap(phys_addr, size); 784 else 785 ggtt->gsm = ioremap_wc(phys_addr, size); 786 if (!ggtt->gsm) { 787 DRM_ERROR("Failed to map the ggtt page table\n"); 788 return -ENOMEM; 789 } 790 791 ret = setup_scratch_page(&ggtt->vm, GFP_DMA32); 792 if (ret) { 793 DRM_ERROR("Scratch setup failed\n"); 794 /* iounmap will also get called at remove, but meh */ 795 iounmap(ggtt->gsm); 796 return ret; 797 } 798 799 ggtt->vm.scratch[0].encode = 800 ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]), 801 I915_CACHE_NONE, 0); 802 803 return 0; 804 } 805 806 int ggtt_set_pages(struct i915_vma *vma) 807 { 808 int ret; 809 810 GEM_BUG_ON(vma->pages); 811 812 ret = i915_get_ggtt_vma_pages(vma); 813 if (ret) 814 return ret; 815 816 vma->page_sizes = vma->obj->mm.page_sizes; 817 818 return 0; 819 } 820 821 static void gen6_gmch_remove(struct i915_address_space *vm) 822 { 823 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 824 825 iounmap(ggtt->gsm); 826 cleanup_scratch_page(vm); 827 } 828 829 static struct resource pci_resource(struct pci_dev *pdev, int bar) 830 { 831 return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar), 832 pci_resource_len(pdev, bar)); 833 } 834 835 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 836 { 837 struct drm_i915_private *i915 = ggtt->vm.i915; 838 struct pci_dev *pdev = i915->drm.pdev; 839 unsigned int size; 840 u16 snb_gmch_ctl; 841 int err; 842 843 /* TODO: We're not aware of mappable constraints on gen8 yet */ 844 if (!IS_DGFX(i915)) { 845 ggtt->gmadr = pci_resource(pdev, 2); 846 ggtt->mappable_end = resource_size(&ggtt->gmadr); 847 } 848 849 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); 850 if (!err) 851 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); 852 if (err) 853 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); 854 855 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 856 if (IS_CHERRYVIEW(i915)) 857 size = chv_get_total_gtt_size(snb_gmch_ctl); 858 else 859 size = gen8_get_total_gtt_size(snb_gmch_ctl); 860 861 ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; 862 ggtt->vm.cleanup = gen6_gmch_remove; 863 ggtt->vm.insert_page = gen8_ggtt_insert_page; 864 ggtt->vm.clear_range = nop_clear_range; 865 if (intel_scanout_needs_vtd_wa(i915)) 866 ggtt->vm.clear_range = gen8_ggtt_clear_range; 867 868 ggtt->vm.insert_entries = gen8_ggtt_insert_entries; 869 870 /* Serialize GTT updates with aperture access on BXT if VT-d is on. */ 871 if (intel_ggtt_update_needs_vtd_wa(i915) || 872 IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) { 873 ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; 874 ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; 875 ggtt->vm.bind_async_flags = 876 I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; 877 } 878 879 ggtt->invalidate = gen8_ggtt_invalidate; 880 881 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 882 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 883 ggtt->vm.vma_ops.set_pages = ggtt_set_pages; 884 ggtt->vm.vma_ops.clear_pages = clear_pages; 885 886 ggtt->vm.pte_encode = gen8_ggtt_pte_encode; 887 888 setup_private_pat(ggtt->vm.gt->uncore); 889 890 return ggtt_probe_common(ggtt, size); 891 } 892 893 static u64 snb_pte_encode(dma_addr_t addr, 894 enum i915_cache_level level, 895 u32 flags) 896 { 897 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 898 899 switch (level) { 900 case I915_CACHE_L3_LLC: 901 case I915_CACHE_LLC: 902 pte |= GEN6_PTE_CACHE_LLC; 903 break; 904 case I915_CACHE_NONE: 905 pte |= GEN6_PTE_UNCACHED; 906 break; 907 default: 908 MISSING_CASE(level); 909 } 910 911 return pte; 912 } 913 914 static u64 ivb_pte_encode(dma_addr_t addr, 915 enum i915_cache_level level, 916 u32 flags) 917 { 918 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 919 920 switch (level) { 921 case I915_CACHE_L3_LLC: 922 pte |= GEN7_PTE_CACHE_L3_LLC; 923 break; 924 case I915_CACHE_LLC: 925 pte |= GEN6_PTE_CACHE_LLC; 926 break; 927 case I915_CACHE_NONE: 928 pte |= GEN6_PTE_UNCACHED; 929 break; 930 default: 931 MISSING_CASE(level); 932 } 933 934 return pte; 935 } 936 937 static u64 byt_pte_encode(dma_addr_t addr, 938 enum i915_cache_level level, 939 u32 flags) 940 { 941 gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 942 943 if (!(flags & PTE_READ_ONLY)) 944 pte |= BYT_PTE_WRITEABLE; 945 946 if (level != I915_CACHE_NONE) 947 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 948 949 return pte; 950 } 951 952 static u64 hsw_pte_encode(dma_addr_t addr, 953 enum i915_cache_level level, 954 u32 flags) 955 { 956 gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 957 958 if (level != I915_CACHE_NONE) 959 pte |= HSW_WB_LLC_AGE3; 960 961 return pte; 962 } 963 964 static u64 iris_pte_encode(dma_addr_t addr, 965 enum i915_cache_level level, 966 u32 flags) 967 { 968 gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; 969 970 switch (level) { 971 case I915_CACHE_NONE: 972 break; 973 case I915_CACHE_WT: 974 pte |= HSW_WT_ELLC_LLC_AGE3; 975 break; 976 default: 977 pte |= HSW_WB_ELLC_LLC_AGE3; 978 break; 979 } 980 981 return pte; 982 } 983 984 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 985 { 986 struct drm_i915_private *i915 = ggtt->vm.i915; 987 struct pci_dev *pdev = i915->drm.pdev; 988 unsigned int size; 989 u16 snb_gmch_ctl; 990 int err; 991 992 ggtt->gmadr = pci_resource(pdev, 2); 993 ggtt->mappable_end = resource_size(&ggtt->gmadr); 994 995 /* 996 * 64/512MB is the current min/max we actually know of, but this is 997 * just a coarse sanity check. 998 */ 999 if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { 1000 DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end); 1001 return -ENXIO; 1002 } 1003 1004 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40)); 1005 if (!err) 1006 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); 1007 if (err) 1008 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); 1009 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1010 1011 size = gen6_get_total_gtt_size(snb_gmch_ctl); 1012 ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; 1013 1014 ggtt->vm.clear_range = nop_clear_range; 1015 if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915)) 1016 ggtt->vm.clear_range = gen6_ggtt_clear_range; 1017 ggtt->vm.insert_page = gen6_ggtt_insert_page; 1018 ggtt->vm.insert_entries = gen6_ggtt_insert_entries; 1019 ggtt->vm.cleanup = gen6_gmch_remove; 1020 1021 ggtt->invalidate = gen6_ggtt_invalidate; 1022 1023 if (HAS_EDRAM(i915)) 1024 ggtt->vm.pte_encode = iris_pte_encode; 1025 else if (IS_HASWELL(i915)) 1026 ggtt->vm.pte_encode = hsw_pte_encode; 1027 else if (IS_VALLEYVIEW(i915)) 1028 ggtt->vm.pte_encode = byt_pte_encode; 1029 else if (INTEL_GEN(i915) >= 7) 1030 ggtt->vm.pte_encode = ivb_pte_encode; 1031 else 1032 ggtt->vm.pte_encode = snb_pte_encode; 1033 1034 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 1035 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 1036 ggtt->vm.vma_ops.set_pages = ggtt_set_pages; 1037 ggtt->vm.vma_ops.clear_pages = clear_pages; 1038 1039 return ggtt_probe_common(ggtt, size); 1040 } 1041 1042 static void i915_gmch_remove(struct i915_address_space *vm) 1043 { 1044 intel_gmch_remove(); 1045 } 1046 1047 static int i915_gmch_probe(struct i915_ggtt *ggtt) 1048 { 1049 struct drm_i915_private *i915 = ggtt->vm.i915; 1050 phys_addr_t gmadr_base; 1051 int ret; 1052 1053 ret = intel_gmch_probe(i915->bridge_dev, i915->drm.pdev, NULL); 1054 if (!ret) { 1055 DRM_ERROR("failed to set up gmch\n"); 1056 return -EIO; 1057 } 1058 1059 intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end); 1060 1061 ggtt->gmadr = 1062 (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); 1063 1064 ggtt->do_idle_maps = needs_idle_maps(i915); 1065 ggtt->vm.insert_page = i915_ggtt_insert_page; 1066 ggtt->vm.insert_entries = i915_ggtt_insert_entries; 1067 ggtt->vm.clear_range = i915_ggtt_clear_range; 1068 ggtt->vm.cleanup = i915_gmch_remove; 1069 1070 ggtt->invalidate = gmch_ggtt_invalidate; 1071 1072 ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma; 1073 ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma; 1074 ggtt->vm.vma_ops.set_pages = ggtt_set_pages; 1075 ggtt->vm.vma_ops.clear_pages = clear_pages; 1076 1077 if (unlikely(ggtt->do_idle_maps)) 1078 dev_notice(i915->drm.dev, 1079 "Applying Ironlake quirks for intel_iommu\n"); 1080 1081 return 0; 1082 } 1083 1084 static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) 1085 { 1086 struct drm_i915_private *i915 = gt->i915; 1087 int ret; 1088 1089 ggtt->vm.gt = gt; 1090 ggtt->vm.i915 = i915; 1091 ggtt->vm.dma = &i915->drm.pdev->dev; 1092 1093 if (INTEL_GEN(i915) <= 5) 1094 ret = i915_gmch_probe(ggtt); 1095 else if (INTEL_GEN(i915) < 8) 1096 ret = gen6_gmch_probe(ggtt); 1097 else 1098 ret = gen8_gmch_probe(ggtt); 1099 if (ret) 1100 return ret; 1101 1102 if ((ggtt->vm.total - 1) >> 32) { 1103 DRM_ERROR("We never expected a Global GTT with more than 32bits" 1104 " of address space! Found %lldM!\n", 1105 ggtt->vm.total >> 20); 1106 ggtt->vm.total = 1ULL << 32; 1107 ggtt->mappable_end = 1108 min_t(u64, ggtt->mappable_end, ggtt->vm.total); 1109 } 1110 1111 if (ggtt->mappable_end > ggtt->vm.total) { 1112 DRM_ERROR("mappable aperture extends past end of GGTT," 1113 " aperture=%pa, total=%llx\n", 1114 &ggtt->mappable_end, ggtt->vm.total); 1115 ggtt->mappable_end = ggtt->vm.total; 1116 } 1117 1118 /* GMADR is the PCI mmio aperture into the global GTT. */ 1119 DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20); 1120 DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20); 1121 DRM_DEBUG_DRIVER("DSM size = %lluM\n", 1122 (u64)resource_size(&intel_graphics_stolen_res) >> 20); 1123 1124 return 0; 1125 } 1126 1127 /** 1128 * i915_ggtt_probe_hw - Probe GGTT hardware location 1129 * @i915: i915 device 1130 */ 1131 int i915_ggtt_probe_hw(struct drm_i915_private *i915) 1132 { 1133 int ret; 1134 1135 ret = ggtt_probe_hw(&i915->ggtt, &i915->gt); 1136 if (ret) 1137 return ret; 1138 1139 if (intel_vtd_active()) 1140 dev_info(i915->drm.dev, "VT-d active for gfx access\n"); 1141 1142 return 0; 1143 } 1144 1145 int i915_ggtt_enable_hw(struct drm_i915_private *i915) 1146 { 1147 if (INTEL_GEN(i915) < 6 && !intel_enable_gtt()) 1148 return -EIO; 1149 1150 return 0; 1151 } 1152 1153 void i915_ggtt_enable_guc(struct i915_ggtt *ggtt) 1154 { 1155 GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate); 1156 1157 ggtt->invalidate = guc_ggtt_invalidate; 1158 1159 ggtt->invalidate(ggtt); 1160 } 1161 1162 void i915_ggtt_disable_guc(struct i915_ggtt *ggtt) 1163 { 1164 /* XXX Temporary pardon for error unload */ 1165 if (ggtt->invalidate == gen8_ggtt_invalidate) 1166 return; 1167 1168 /* We should only be called after i915_ggtt_enable_guc() */ 1169 GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate); 1170 1171 ggtt->invalidate = gen8_ggtt_invalidate; 1172 1173 ggtt->invalidate(ggtt); 1174 } 1175 1176 void i915_ggtt_resume(struct i915_ggtt *ggtt) 1177 { 1178 struct i915_vma *vma; 1179 bool flush = false; 1180 int open; 1181 1182 intel_gt_check_and_clear_faults(ggtt->vm.gt); 1183 1184 /* First fill our portion of the GTT with scratch pages */ 1185 ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); 1186 1187 /* Skip rewriting PTE on VMA unbind. */ 1188 open = atomic_xchg(&ggtt->vm.open, 0); 1189 1190 /* clflush objects bound into the GGTT and rebind them. */ 1191 list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) { 1192 struct drm_i915_gem_object *obj = vma->obj; 1193 1194 if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) 1195 continue; 1196 1197 clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma)); 1198 WARN_ON(i915_vma_bind(vma, 1199 obj ? obj->cache_level : 0, 1200 PIN_GLOBAL, NULL)); 1201 if (obj) { /* only used during resume => exclusive access */ 1202 flush |= fetch_and_zero(&obj->write_domain); 1203 obj->read_domains |= I915_GEM_DOMAIN_GTT; 1204 } 1205 } 1206 1207 atomic_set(&ggtt->vm.open, open); 1208 ggtt->invalidate(ggtt); 1209 1210 if (flush) 1211 wbinvd_on_all_cpus(); 1212 1213 if (INTEL_GEN(ggtt->vm.i915) >= 8) 1214 setup_private_pat(ggtt->vm.gt->uncore); 1215 } 1216 1217 static struct scatterlist * 1218 rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset, 1219 unsigned int width, unsigned int height, 1220 unsigned int stride, 1221 struct sg_table *st, struct scatterlist *sg) 1222 { 1223 unsigned int column, row; 1224 unsigned int src_idx; 1225 1226 for (column = 0; column < width; column++) { 1227 src_idx = stride * (height - 1) + column + offset; 1228 for (row = 0; row < height; row++) { 1229 st->nents++; 1230 /* 1231 * We don't need the pages, but need to initialize 1232 * the entries so the sg list can be happily traversed. 1233 * The only thing we need are DMA addresses. 1234 */ 1235 sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0); 1236 sg_dma_address(sg) = 1237 i915_gem_object_get_dma_address(obj, src_idx); 1238 sg_dma_len(sg) = I915_GTT_PAGE_SIZE; 1239 sg = sg_next(sg); 1240 src_idx -= stride; 1241 } 1242 } 1243 1244 return sg; 1245 } 1246 1247 static noinline struct sg_table * 1248 intel_rotate_pages(struct intel_rotation_info *rot_info, 1249 struct drm_i915_gem_object *obj) 1250 { 1251 unsigned int size = intel_rotation_info_size(rot_info); 1252 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1253 struct sg_table *st; 1254 struct scatterlist *sg; 1255 int ret = -ENOMEM; 1256 int i; 1257 1258 /* Allocate target SG list. */ 1259 st = kmalloc(sizeof(*st), GFP_KERNEL); 1260 if (!st) 1261 goto err_st_alloc; 1262 1263 ret = sg_alloc_table(st, size, GFP_KERNEL); 1264 if (ret) 1265 goto err_sg_alloc; 1266 1267 st->nents = 0; 1268 sg = st->sgl; 1269 1270 for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { 1271 sg = rotate_pages(obj, rot_info->plane[i].offset, 1272 rot_info->plane[i].width, rot_info->plane[i].height, 1273 rot_info->plane[i].stride, st, sg); 1274 } 1275 1276 return st; 1277 1278 err_sg_alloc: 1279 kfree(st); 1280 err_st_alloc: 1281 1282 drm_dbg(&i915->drm, "Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", 1283 obj->base.size, rot_info->plane[0].width, 1284 rot_info->plane[0].height, size); 1285 1286 return ERR_PTR(ret); 1287 } 1288 1289 static struct scatterlist * 1290 remap_pages(struct drm_i915_gem_object *obj, unsigned int offset, 1291 unsigned int width, unsigned int height, 1292 unsigned int stride, 1293 struct sg_table *st, struct scatterlist *sg) 1294 { 1295 unsigned int row; 1296 1297 for (row = 0; row < height; row++) { 1298 unsigned int left = width * I915_GTT_PAGE_SIZE; 1299 1300 while (left) { 1301 dma_addr_t addr; 1302 unsigned int length; 1303 1304 /* 1305 * We don't need the pages, but need to initialize 1306 * the entries so the sg list can be happily traversed. 1307 * The only thing we need are DMA addresses. 1308 */ 1309 1310 addr = i915_gem_object_get_dma_address_len(obj, offset, &length); 1311 1312 length = min(left, length); 1313 1314 st->nents++; 1315 1316 sg_set_page(sg, NULL, length, 0); 1317 sg_dma_address(sg) = addr; 1318 sg_dma_len(sg) = length; 1319 sg = sg_next(sg); 1320 1321 offset += length / I915_GTT_PAGE_SIZE; 1322 left -= length; 1323 } 1324 1325 offset += stride - width; 1326 } 1327 1328 return sg; 1329 } 1330 1331 static noinline struct sg_table * 1332 intel_remap_pages(struct intel_remapped_info *rem_info, 1333 struct drm_i915_gem_object *obj) 1334 { 1335 unsigned int size = intel_remapped_info_size(rem_info); 1336 struct drm_i915_private *i915 = to_i915(obj->base.dev); 1337 struct sg_table *st; 1338 struct scatterlist *sg; 1339 int ret = -ENOMEM; 1340 int i; 1341 1342 /* Allocate target SG list. */ 1343 st = kmalloc(sizeof(*st), GFP_KERNEL); 1344 if (!st) 1345 goto err_st_alloc; 1346 1347 ret = sg_alloc_table(st, size, GFP_KERNEL); 1348 if (ret) 1349 goto err_sg_alloc; 1350 1351 st->nents = 0; 1352 sg = st->sgl; 1353 1354 for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) { 1355 sg = remap_pages(obj, rem_info->plane[i].offset, 1356 rem_info->plane[i].width, rem_info->plane[i].height, 1357 rem_info->plane[i].stride, st, sg); 1358 } 1359 1360 i915_sg_trim(st); 1361 1362 return st; 1363 1364 err_sg_alloc: 1365 kfree(st); 1366 err_st_alloc: 1367 1368 drm_dbg(&i915->drm, "Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n", 1369 obj->base.size, rem_info->plane[0].width, 1370 rem_info->plane[0].height, size); 1371 1372 return ERR_PTR(ret); 1373 } 1374 1375 static noinline struct sg_table * 1376 intel_partial_pages(const struct i915_ggtt_view *view, 1377 struct drm_i915_gem_object *obj) 1378 { 1379 struct sg_table *st; 1380 struct scatterlist *sg, *iter; 1381 unsigned int count = view->partial.size; 1382 unsigned int offset; 1383 int ret = -ENOMEM; 1384 1385 st = kmalloc(sizeof(*st), GFP_KERNEL); 1386 if (!st) 1387 goto err_st_alloc; 1388 1389 ret = sg_alloc_table(st, count, GFP_KERNEL); 1390 if (ret) 1391 goto err_sg_alloc; 1392 1393 iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset); 1394 GEM_BUG_ON(!iter); 1395 1396 sg = st->sgl; 1397 st->nents = 0; 1398 do { 1399 unsigned int len; 1400 1401 len = min(iter->length - (offset << PAGE_SHIFT), 1402 count << PAGE_SHIFT); 1403 sg_set_page(sg, NULL, len, 0); 1404 sg_dma_address(sg) = 1405 sg_dma_address(iter) + (offset << PAGE_SHIFT); 1406 sg_dma_len(sg) = len; 1407 1408 st->nents++; 1409 count -= len >> PAGE_SHIFT; 1410 if (count == 0) { 1411 sg_mark_end(sg); 1412 i915_sg_trim(st); /* Drop any unused tail entries. */ 1413 1414 return st; 1415 } 1416 1417 sg = __sg_next(sg); 1418 iter = __sg_next(iter); 1419 offset = 0; 1420 } while (1); 1421 1422 err_sg_alloc: 1423 kfree(st); 1424 err_st_alloc: 1425 return ERR_PTR(ret); 1426 } 1427 1428 static int 1429 i915_get_ggtt_vma_pages(struct i915_vma *vma) 1430 { 1431 int ret; 1432 1433 /* 1434 * The vma->pages are only valid within the lifespan of the borrowed 1435 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so 1436 * must be the vma->pages. A simple rule is that vma->pages must only 1437 * be accessed when the obj->mm.pages are pinned. 1438 */ 1439 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); 1440 1441 switch (vma->ggtt_view.type) { 1442 default: 1443 GEM_BUG_ON(vma->ggtt_view.type); 1444 /* fall through */ 1445 case I915_GGTT_VIEW_NORMAL: 1446 vma->pages = vma->obj->mm.pages; 1447 return 0; 1448 1449 case I915_GGTT_VIEW_ROTATED: 1450 vma->pages = 1451 intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); 1452 break; 1453 1454 case I915_GGTT_VIEW_REMAPPED: 1455 vma->pages = 1456 intel_remap_pages(&vma->ggtt_view.remapped, vma->obj); 1457 break; 1458 1459 case I915_GGTT_VIEW_PARTIAL: 1460 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); 1461 break; 1462 } 1463 1464 ret = 0; 1465 if (IS_ERR(vma->pages)) { 1466 ret = PTR_ERR(vma->pages); 1467 vma->pages = NULL; 1468 drm_err(&vma->vm->i915->drm, 1469 "Failed to get pages for VMA view type %u (%d)!\n", 1470 vma->ggtt_view.type, ret); 1471 } 1472 return ret; 1473 } 1474