1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/slab.h> /* fault-inject.h is not standalone! */ 27 28 #include <linux/fault-inject.h> 29 #include <linux/log2.h> 30 #include <linux/random.h> 31 #include <linux/seq_file.h> 32 #include <linux/stop_machine.h> 33 34 #include <drm/drmP.h> 35 #include <drm/i915_drm.h> 36 37 #include "i915_drv.h" 38 #include "i915_vgpu.h" 39 #include "i915_trace.h" 40 #include "intel_drv.h" 41 #include "intel_frontbuffer.h" 42 43 #define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM) 44 45 /** 46 * DOC: Global GTT views 47 * 48 * Background and previous state 49 * 50 * Historically objects could exists (be bound) in global GTT space only as 51 * singular instances with a view representing all of the object's backing pages 52 * in a linear fashion. This view will be called a normal view. 53 * 54 * To support multiple views of the same object, where the number of mapped 55 * pages is not equal to the backing store, or where the layout of the pages 56 * is not linear, concept of a GGTT view was added. 57 * 58 * One example of an alternative view is a stereo display driven by a single 59 * image. In this case we would have a framebuffer looking like this 60 * (2x2 pages): 61 * 62 * 12 63 * 34 64 * 65 * Above would represent a normal GGTT view as normally mapped for GPU or CPU 66 * rendering. In contrast, fed to the display engine would be an alternative 67 * view which could look something like this: 68 * 69 * 1212 70 * 3434 71 * 72 * In this example both the size and layout of pages in the alternative view is 73 * different from the normal view. 74 * 75 * Implementation and usage 76 * 77 * GGTT views are implemented using VMAs and are distinguished via enum 78 * i915_ggtt_view_type and struct i915_ggtt_view. 79 * 80 * A new flavour of core GEM functions which work with GGTT bound objects were 81 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid 82 * renaming in large amounts of code. They take the struct i915_ggtt_view 83 * parameter encapsulating all metadata required to implement a view. 84 * 85 * As a helper for callers which are only interested in the normal view, 86 * globally const i915_ggtt_view_normal singleton instance exists. All old core 87 * GEM API functions, the ones not taking the view parameter, are operating on, 88 * or with the normal GGTT view. 89 * 90 * Code wanting to add or use a new GGTT view needs to: 91 * 92 * 1. Add a new enum with a suitable name. 93 * 2. Extend the metadata in the i915_ggtt_view structure if required. 94 * 3. Add support to i915_get_vma_pages(). 95 * 96 * New views are required to build a scatter-gather table from within the 97 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and 98 * exists for the lifetime of an VMA. 99 * 100 * Core API is designed to have copy semantics which means that passed in 101 * struct i915_ggtt_view does not need to be persistent (left around after 102 * calling the core API functions). 103 * 104 */ 105 106 static int 107 i915_get_ggtt_vma_pages(struct i915_vma *vma); 108 109 static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv) 110 { 111 /* Note that as an uncached mmio write, this should flush the 112 * WCB of the writes into the GGTT before it triggers the invalidate. 113 */ 114 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 115 } 116 117 static void guc_ggtt_invalidate(struct drm_i915_private *dev_priv) 118 { 119 gen6_ggtt_invalidate(dev_priv); 120 I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); 121 } 122 123 static void gmch_ggtt_invalidate(struct drm_i915_private *dev_priv) 124 { 125 intel_gtt_chipset_flush(); 126 } 127 128 static inline void i915_ggtt_invalidate(struct drm_i915_private *i915) 129 { 130 i915->ggtt.invalidate(i915); 131 } 132 133 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, 134 int enable_ppgtt) 135 { 136 bool has_aliasing_ppgtt; 137 bool has_full_ppgtt; 138 bool has_full_48bit_ppgtt; 139 140 has_aliasing_ppgtt = dev_priv->info.has_aliasing_ppgtt; 141 has_full_ppgtt = dev_priv->info.has_full_ppgtt; 142 has_full_48bit_ppgtt = dev_priv->info.has_full_48bit_ppgtt; 143 144 if (intel_vgpu_active(dev_priv)) { 145 /* emulation is too hard */ 146 has_full_ppgtt = false; 147 has_full_48bit_ppgtt = false; 148 } 149 150 if (!has_aliasing_ppgtt) 151 return 0; 152 153 /* 154 * We don't allow disabling PPGTT for gen9+ as it's a requirement for 155 * execlists, the sole mechanism available to submit work. 156 */ 157 if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9) 158 return 0; 159 160 if (enable_ppgtt == 1) 161 return 1; 162 163 if (enable_ppgtt == 2 && has_full_ppgtt) 164 return 2; 165 166 if (enable_ppgtt == 3 && has_full_48bit_ppgtt) 167 return 3; 168 169 #ifdef CONFIG_INTEL_IOMMU 170 /* Disable ppgtt on SNB if VT-d is on. */ 171 if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) { 172 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 173 return 0; 174 } 175 #endif 176 177 /* Early VLV doesn't have this */ 178 if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) { 179 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n"); 180 return 0; 181 } 182 183 if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt) 184 return has_full_48bit_ppgtt ? 3 : 2; 185 else 186 return has_aliasing_ppgtt ? 1 : 0; 187 } 188 189 static int ppgtt_bind_vma(struct i915_vma *vma, 190 enum i915_cache_level cache_level, 191 u32 unused) 192 { 193 u32 pte_flags; 194 int ret; 195 196 ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size); 197 if (ret) 198 return ret; 199 200 vma->pages = vma->obj->mm.pages; 201 202 /* Currently applicable only to VLV */ 203 pte_flags = 0; 204 if (vma->obj->gt_ro) 205 pte_flags |= PTE_READ_ONLY; 206 207 vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, 208 cache_level, pte_flags); 209 210 return 0; 211 } 212 213 static void ppgtt_unbind_vma(struct i915_vma *vma) 214 { 215 vma->vm->clear_range(vma->vm, vma->node.start, vma->size); 216 } 217 218 static gen8_pte_t gen8_pte_encode(dma_addr_t addr, 219 enum i915_cache_level level) 220 { 221 gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW; 222 pte |= addr; 223 224 switch (level) { 225 case I915_CACHE_NONE: 226 pte |= PPAT_UNCACHED_INDEX; 227 break; 228 case I915_CACHE_WT: 229 pte |= PPAT_DISPLAY_ELLC_INDEX; 230 break; 231 default: 232 pte |= PPAT_CACHED_INDEX; 233 break; 234 } 235 236 return pte; 237 } 238 239 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, 240 const enum i915_cache_level level) 241 { 242 gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 243 pde |= addr; 244 if (level != I915_CACHE_NONE) 245 pde |= PPAT_CACHED_PDE_INDEX; 246 else 247 pde |= PPAT_UNCACHED_INDEX; 248 return pde; 249 } 250 251 #define gen8_pdpe_encode gen8_pde_encode 252 #define gen8_pml4e_encode gen8_pde_encode 253 254 static gen6_pte_t snb_pte_encode(dma_addr_t addr, 255 enum i915_cache_level level, 256 u32 unused) 257 { 258 gen6_pte_t pte = GEN6_PTE_VALID; 259 pte |= GEN6_PTE_ADDR_ENCODE(addr); 260 261 switch (level) { 262 case I915_CACHE_L3_LLC: 263 case I915_CACHE_LLC: 264 pte |= GEN6_PTE_CACHE_LLC; 265 break; 266 case I915_CACHE_NONE: 267 pte |= GEN6_PTE_UNCACHED; 268 break; 269 default: 270 MISSING_CASE(level); 271 } 272 273 return pte; 274 } 275 276 static gen6_pte_t ivb_pte_encode(dma_addr_t addr, 277 enum i915_cache_level level, 278 u32 unused) 279 { 280 gen6_pte_t pte = GEN6_PTE_VALID; 281 pte |= GEN6_PTE_ADDR_ENCODE(addr); 282 283 switch (level) { 284 case I915_CACHE_L3_LLC: 285 pte |= GEN7_PTE_CACHE_L3_LLC; 286 break; 287 case I915_CACHE_LLC: 288 pte |= GEN6_PTE_CACHE_LLC; 289 break; 290 case I915_CACHE_NONE: 291 pte |= GEN6_PTE_UNCACHED; 292 break; 293 default: 294 MISSING_CASE(level); 295 } 296 297 return pte; 298 } 299 300 static gen6_pte_t byt_pte_encode(dma_addr_t addr, 301 enum i915_cache_level level, 302 u32 flags) 303 { 304 gen6_pte_t pte = GEN6_PTE_VALID; 305 pte |= GEN6_PTE_ADDR_ENCODE(addr); 306 307 if (!(flags & PTE_READ_ONLY)) 308 pte |= BYT_PTE_WRITEABLE; 309 310 if (level != I915_CACHE_NONE) 311 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 312 313 return pte; 314 } 315 316 static gen6_pte_t hsw_pte_encode(dma_addr_t addr, 317 enum i915_cache_level level, 318 u32 unused) 319 { 320 gen6_pte_t pte = GEN6_PTE_VALID; 321 pte |= HSW_PTE_ADDR_ENCODE(addr); 322 323 if (level != I915_CACHE_NONE) 324 pte |= HSW_WB_LLC_AGE3; 325 326 return pte; 327 } 328 329 static gen6_pte_t iris_pte_encode(dma_addr_t addr, 330 enum i915_cache_level level, 331 u32 unused) 332 { 333 gen6_pte_t pte = GEN6_PTE_VALID; 334 pte |= HSW_PTE_ADDR_ENCODE(addr); 335 336 switch (level) { 337 case I915_CACHE_NONE: 338 break; 339 case I915_CACHE_WT: 340 pte |= HSW_WT_ELLC_LLC_AGE3; 341 break; 342 default: 343 pte |= HSW_WB_ELLC_LLC_AGE3; 344 break; 345 } 346 347 return pte; 348 } 349 350 static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) 351 { 352 struct page *page; 353 354 if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) 355 i915_gem_shrink_all(vm->i915); 356 357 if (vm->free_pages.nr) 358 return vm->free_pages.pages[--vm->free_pages.nr]; 359 360 page = alloc_page(gfp); 361 if (!page) 362 return NULL; 363 364 if (vm->pt_kmap_wc) 365 set_pages_array_wc(&page, 1); 366 367 return page; 368 } 369 370 static void vm_free_pages_release(struct i915_address_space *vm) 371 { 372 GEM_BUG_ON(!pagevec_count(&vm->free_pages)); 373 374 if (vm->pt_kmap_wc) 375 set_pages_array_wb(vm->free_pages.pages, 376 pagevec_count(&vm->free_pages)); 377 378 __pagevec_release(&vm->free_pages); 379 } 380 381 static void vm_free_page(struct i915_address_space *vm, struct page *page) 382 { 383 if (!pagevec_add(&vm->free_pages, page)) 384 vm_free_pages_release(vm); 385 } 386 387 static int __setup_page_dma(struct i915_address_space *vm, 388 struct i915_page_dma *p, 389 gfp_t gfp) 390 { 391 p->page = vm_alloc_page(vm, gfp | __GFP_NOWARN | __GFP_NORETRY); 392 if (unlikely(!p->page)) 393 return -ENOMEM; 394 395 p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE, 396 PCI_DMA_BIDIRECTIONAL); 397 if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { 398 vm_free_page(vm, p->page); 399 return -ENOMEM; 400 } 401 402 return 0; 403 } 404 405 static int setup_page_dma(struct i915_address_space *vm, 406 struct i915_page_dma *p) 407 { 408 return __setup_page_dma(vm, p, I915_GFP_DMA); 409 } 410 411 static void cleanup_page_dma(struct i915_address_space *vm, 412 struct i915_page_dma *p) 413 { 414 dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 415 vm_free_page(vm, p->page); 416 } 417 418 #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page) 419 420 #define setup_px(vm, px) setup_page_dma((vm), px_base(px)) 421 #define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px)) 422 #define fill_px(ppgtt, px, v) fill_page_dma((vm), px_base(px), (v)) 423 #define fill32_px(ppgtt, px, v) fill_page_dma_32((vm), px_base(px), (v)) 424 425 static void fill_page_dma(struct i915_address_space *vm, 426 struct i915_page_dma *p, 427 const u64 val) 428 { 429 u64 * const vaddr = kmap_atomic(p->page); 430 int i; 431 432 for (i = 0; i < 512; i++) 433 vaddr[i] = val; 434 435 kunmap_atomic(vaddr); 436 } 437 438 static void fill_page_dma_32(struct i915_address_space *vm, 439 struct i915_page_dma *p, 440 const u32 v) 441 { 442 fill_page_dma(vm, p, (u64)v << 32 | v); 443 } 444 445 static int 446 setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) 447 { 448 return __setup_page_dma(vm, &vm->scratch_page, gfp | __GFP_ZERO); 449 } 450 451 static void cleanup_scratch_page(struct i915_address_space *vm) 452 { 453 cleanup_page_dma(vm, &vm->scratch_page); 454 } 455 456 static struct i915_page_table *alloc_pt(struct i915_address_space *vm) 457 { 458 struct i915_page_table *pt; 459 460 pt = kmalloc(sizeof(*pt), GFP_KERNEL | __GFP_NOWARN); 461 if (unlikely(!pt)) 462 return ERR_PTR(-ENOMEM); 463 464 if (unlikely(setup_px(vm, pt))) { 465 kfree(pt); 466 return ERR_PTR(-ENOMEM); 467 } 468 469 pt->used_ptes = 0; 470 return pt; 471 } 472 473 static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt) 474 { 475 cleanup_px(vm, pt); 476 kfree(pt); 477 } 478 479 static void gen8_initialize_pt(struct i915_address_space *vm, 480 struct i915_page_table *pt) 481 { 482 fill_px(vm, pt, 483 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC)); 484 } 485 486 static void gen6_initialize_pt(struct i915_address_space *vm, 487 struct i915_page_table *pt) 488 { 489 fill32_px(vm, pt, 490 vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0)); 491 } 492 493 static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) 494 { 495 struct i915_page_directory *pd; 496 497 pd = kzalloc(sizeof(*pd), GFP_KERNEL | __GFP_NOWARN); 498 if (unlikely(!pd)) 499 return ERR_PTR(-ENOMEM); 500 501 if (unlikely(setup_px(vm, pd))) { 502 kfree(pd); 503 return ERR_PTR(-ENOMEM); 504 } 505 506 pd->used_pdes = 0; 507 return pd; 508 } 509 510 static void free_pd(struct i915_address_space *vm, 511 struct i915_page_directory *pd) 512 { 513 cleanup_px(vm, pd); 514 kfree(pd); 515 } 516 517 static void gen8_initialize_pd(struct i915_address_space *vm, 518 struct i915_page_directory *pd) 519 { 520 unsigned int i; 521 522 fill_px(vm, pd, 523 gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC)); 524 for (i = 0; i < I915_PDES; i++) 525 pd->page_table[i] = vm->scratch_pt; 526 } 527 528 static int __pdp_init(struct i915_address_space *vm, 529 struct i915_page_directory_pointer *pdp) 530 { 531 const unsigned int pdpes = i915_pdpes_per_pdp(vm); 532 unsigned int i; 533 534 pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory), 535 GFP_KERNEL | __GFP_NOWARN); 536 if (unlikely(!pdp->page_directory)) 537 return -ENOMEM; 538 539 for (i = 0; i < pdpes; i++) 540 pdp->page_directory[i] = vm->scratch_pd; 541 542 return 0; 543 } 544 545 static void __pdp_fini(struct i915_page_directory_pointer *pdp) 546 { 547 kfree(pdp->page_directory); 548 pdp->page_directory = NULL; 549 } 550 551 static inline bool use_4lvl(const struct i915_address_space *vm) 552 { 553 return i915_vm_is_48bit(vm); 554 } 555 556 static struct i915_page_directory_pointer * 557 alloc_pdp(struct i915_address_space *vm) 558 { 559 struct i915_page_directory_pointer *pdp; 560 int ret = -ENOMEM; 561 562 WARN_ON(!use_4lvl(vm)); 563 564 pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); 565 if (!pdp) 566 return ERR_PTR(-ENOMEM); 567 568 ret = __pdp_init(vm, pdp); 569 if (ret) 570 goto fail_bitmap; 571 572 ret = setup_px(vm, pdp); 573 if (ret) 574 goto fail_page_m; 575 576 return pdp; 577 578 fail_page_m: 579 __pdp_fini(pdp); 580 fail_bitmap: 581 kfree(pdp); 582 583 return ERR_PTR(ret); 584 } 585 586 static void free_pdp(struct i915_address_space *vm, 587 struct i915_page_directory_pointer *pdp) 588 { 589 __pdp_fini(pdp); 590 591 if (!use_4lvl(vm)) 592 return; 593 594 cleanup_px(vm, pdp); 595 kfree(pdp); 596 } 597 598 static void gen8_initialize_pdp(struct i915_address_space *vm, 599 struct i915_page_directory_pointer *pdp) 600 { 601 gen8_ppgtt_pdpe_t scratch_pdpe; 602 603 scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); 604 605 fill_px(vm, pdp, scratch_pdpe); 606 } 607 608 static void gen8_initialize_pml4(struct i915_address_space *vm, 609 struct i915_pml4 *pml4) 610 { 611 unsigned int i; 612 613 fill_px(vm, pml4, 614 gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC)); 615 for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) 616 pml4->pdps[i] = vm->scratch_pdp; 617 } 618 619 /* Broadwell Page Directory Pointer Descriptors */ 620 static int gen8_write_pdp(struct drm_i915_gem_request *req, 621 unsigned entry, 622 dma_addr_t addr) 623 { 624 struct intel_engine_cs *engine = req->engine; 625 u32 *cs; 626 627 BUG_ON(entry >= 4); 628 629 cs = intel_ring_begin(req, 6); 630 if (IS_ERR(cs)) 631 return PTR_ERR(cs); 632 633 *cs++ = MI_LOAD_REGISTER_IMM(1); 634 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, entry)); 635 *cs++ = upper_32_bits(addr); 636 *cs++ = MI_LOAD_REGISTER_IMM(1); 637 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, entry)); 638 *cs++ = lower_32_bits(addr); 639 intel_ring_advance(req, cs); 640 641 return 0; 642 } 643 644 static int gen8_mm_switch_3lvl(struct i915_hw_ppgtt *ppgtt, 645 struct drm_i915_gem_request *req) 646 { 647 int i, ret; 648 649 for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) { 650 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); 651 652 ret = gen8_write_pdp(req, i, pd_daddr); 653 if (ret) 654 return ret; 655 } 656 657 return 0; 658 } 659 660 static int gen8_mm_switch_4lvl(struct i915_hw_ppgtt *ppgtt, 661 struct drm_i915_gem_request *req) 662 { 663 return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); 664 } 665 666 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify 667 * the page table structures, we mark them dirty so that 668 * context switching/execlist queuing code takes extra steps 669 * to ensure that tlbs are flushed. 670 */ 671 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) 672 { 673 ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.i915)->ring_mask; 674 } 675 676 /* Removes entries from a single page table, releasing it if it's empty. 677 * Caller can use the return value to update higher-level entries. 678 */ 679 static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, 680 struct i915_page_table *pt, 681 u64 start, u64 length) 682 { 683 unsigned int num_entries = gen8_pte_count(start, length); 684 unsigned int pte = gen8_pte_index(start); 685 unsigned int pte_end = pte + num_entries; 686 const gen8_pte_t scratch_pte = 687 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); 688 gen8_pte_t *vaddr; 689 690 GEM_BUG_ON(num_entries > pt->used_ptes); 691 692 pt->used_ptes -= num_entries; 693 if (!pt->used_ptes) 694 return true; 695 696 vaddr = kmap_atomic_px(pt); 697 while (pte < pte_end) 698 vaddr[pte++] = scratch_pte; 699 kunmap_atomic(vaddr); 700 701 return false; 702 } 703 704 static void gen8_ppgtt_set_pde(struct i915_address_space *vm, 705 struct i915_page_directory *pd, 706 struct i915_page_table *pt, 707 unsigned int pde) 708 { 709 gen8_pde_t *vaddr; 710 711 pd->page_table[pde] = pt; 712 713 vaddr = kmap_atomic_px(pd); 714 vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC); 715 kunmap_atomic(vaddr); 716 } 717 718 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, 719 struct i915_page_directory *pd, 720 u64 start, u64 length) 721 { 722 struct i915_page_table *pt; 723 u32 pde; 724 725 gen8_for_each_pde(pt, pd, start, length, pde) { 726 GEM_BUG_ON(pt == vm->scratch_pt); 727 728 if (!gen8_ppgtt_clear_pt(vm, pt, start, length)) 729 continue; 730 731 gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde); 732 GEM_BUG_ON(!pd->used_pdes); 733 pd->used_pdes--; 734 735 free_pt(vm, pt); 736 } 737 738 return !pd->used_pdes; 739 } 740 741 static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm, 742 struct i915_page_directory_pointer *pdp, 743 struct i915_page_directory *pd, 744 unsigned int pdpe) 745 { 746 gen8_ppgtt_pdpe_t *vaddr; 747 748 pdp->page_directory[pdpe] = pd; 749 if (!use_4lvl(vm)) 750 return; 751 752 vaddr = kmap_atomic_px(pdp); 753 vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); 754 kunmap_atomic(vaddr); 755 } 756 757 /* Removes entries from a single page dir pointer, releasing it if it's empty. 758 * Caller can use the return value to update higher-level entries 759 */ 760 static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, 761 struct i915_page_directory_pointer *pdp, 762 u64 start, u64 length) 763 { 764 struct i915_page_directory *pd; 765 unsigned int pdpe; 766 767 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 768 GEM_BUG_ON(pd == vm->scratch_pd); 769 770 if (!gen8_ppgtt_clear_pd(vm, pd, start, length)) 771 continue; 772 773 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); 774 GEM_BUG_ON(!pdp->used_pdpes); 775 pdp->used_pdpes--; 776 777 free_pd(vm, pd); 778 } 779 780 return !pdp->used_pdpes; 781 } 782 783 static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm, 784 u64 start, u64 length) 785 { 786 gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length); 787 } 788 789 static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4, 790 struct i915_page_directory_pointer *pdp, 791 unsigned int pml4e) 792 { 793 gen8_ppgtt_pml4e_t *vaddr; 794 795 pml4->pdps[pml4e] = pdp; 796 797 vaddr = kmap_atomic_px(pml4); 798 vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); 799 kunmap_atomic(vaddr); 800 } 801 802 /* Removes entries from a single pml4. 803 * This is the top-level structure in 4-level page tables used on gen8+. 804 * Empty entries are always scratch pml4e. 805 */ 806 static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, 807 u64 start, u64 length) 808 { 809 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 810 struct i915_pml4 *pml4 = &ppgtt->pml4; 811 struct i915_page_directory_pointer *pdp; 812 unsigned int pml4e; 813 814 GEM_BUG_ON(!use_4lvl(vm)); 815 816 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 817 GEM_BUG_ON(pdp == vm->scratch_pdp); 818 819 if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length)) 820 continue; 821 822 gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); 823 824 free_pdp(vm, pdp); 825 } 826 } 827 828 struct sgt_dma { 829 struct scatterlist *sg; 830 dma_addr_t dma, max; 831 }; 832 833 struct gen8_insert_pte { 834 u16 pml4e; 835 u16 pdpe; 836 u16 pde; 837 u16 pte; 838 }; 839 840 static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start) 841 { 842 return (struct gen8_insert_pte) { 843 gen8_pml4e_index(start), 844 gen8_pdpe_index(start), 845 gen8_pde_index(start), 846 gen8_pte_index(start), 847 }; 848 } 849 850 static __always_inline bool 851 gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, 852 struct i915_page_directory_pointer *pdp, 853 struct sgt_dma *iter, 854 struct gen8_insert_pte *idx, 855 enum i915_cache_level cache_level) 856 { 857 struct i915_page_directory *pd; 858 const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); 859 gen8_pte_t *vaddr; 860 bool ret; 861 862 GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base)); 863 pd = pdp->page_directory[idx->pdpe]; 864 vaddr = kmap_atomic_px(pd->page_table[idx->pde]); 865 do { 866 vaddr[idx->pte] = pte_encode | iter->dma; 867 868 iter->dma += PAGE_SIZE; 869 if (iter->dma >= iter->max) { 870 iter->sg = __sg_next(iter->sg); 871 if (!iter->sg) { 872 ret = false; 873 break; 874 } 875 876 iter->dma = sg_dma_address(iter->sg); 877 iter->max = iter->dma + iter->sg->length; 878 } 879 880 if (++idx->pte == GEN8_PTES) { 881 idx->pte = 0; 882 883 if (++idx->pde == I915_PDES) { 884 idx->pde = 0; 885 886 /* Limited by sg length for 3lvl */ 887 if (++idx->pdpe == GEN8_PML4ES_PER_PML4) { 888 idx->pdpe = 0; 889 ret = true; 890 break; 891 } 892 893 GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base)); 894 pd = pdp->page_directory[idx->pdpe]; 895 } 896 897 kunmap_atomic(vaddr); 898 vaddr = kmap_atomic_px(pd->page_table[idx->pde]); 899 } 900 } while (1); 901 kunmap_atomic(vaddr); 902 903 return ret; 904 } 905 906 static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, 907 struct sg_table *pages, 908 u64 start, 909 enum i915_cache_level cache_level, 910 u32 unused) 911 { 912 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 913 struct sgt_dma iter = { 914 .sg = pages->sgl, 915 .dma = sg_dma_address(iter.sg), 916 .max = iter.dma + iter.sg->length, 917 }; 918 struct gen8_insert_pte idx = gen8_insert_pte(start); 919 920 gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, 921 cache_level); 922 } 923 924 static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, 925 struct sg_table *pages, 926 u64 start, 927 enum i915_cache_level cache_level, 928 u32 unused) 929 { 930 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 931 struct sgt_dma iter = { 932 .sg = pages->sgl, 933 .dma = sg_dma_address(iter.sg), 934 .max = iter.dma + iter.sg->length, 935 }; 936 struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; 937 struct gen8_insert_pte idx = gen8_insert_pte(start); 938 939 while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], &iter, 940 &idx, cache_level)) 941 GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); 942 } 943 944 static void gen8_free_page_tables(struct i915_address_space *vm, 945 struct i915_page_directory *pd) 946 { 947 int i; 948 949 if (!px_page(pd)) 950 return; 951 952 for (i = 0; i < I915_PDES; i++) { 953 if (pd->page_table[i] != vm->scratch_pt) 954 free_pt(vm, pd->page_table[i]); 955 } 956 } 957 958 static int gen8_init_scratch(struct i915_address_space *vm) 959 { 960 int ret; 961 962 ret = setup_scratch_page(vm, I915_GFP_DMA); 963 if (ret) 964 return ret; 965 966 vm->scratch_pt = alloc_pt(vm); 967 if (IS_ERR(vm->scratch_pt)) { 968 ret = PTR_ERR(vm->scratch_pt); 969 goto free_scratch_page; 970 } 971 972 vm->scratch_pd = alloc_pd(vm); 973 if (IS_ERR(vm->scratch_pd)) { 974 ret = PTR_ERR(vm->scratch_pd); 975 goto free_pt; 976 } 977 978 if (use_4lvl(vm)) { 979 vm->scratch_pdp = alloc_pdp(vm); 980 if (IS_ERR(vm->scratch_pdp)) { 981 ret = PTR_ERR(vm->scratch_pdp); 982 goto free_pd; 983 } 984 } 985 986 gen8_initialize_pt(vm, vm->scratch_pt); 987 gen8_initialize_pd(vm, vm->scratch_pd); 988 if (use_4lvl(vm)) 989 gen8_initialize_pdp(vm, vm->scratch_pdp); 990 991 return 0; 992 993 free_pd: 994 free_pd(vm, vm->scratch_pd); 995 free_pt: 996 free_pt(vm, vm->scratch_pt); 997 free_scratch_page: 998 cleanup_scratch_page(vm); 999 1000 return ret; 1001 } 1002 1003 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) 1004 { 1005 struct i915_address_space *vm = &ppgtt->base; 1006 struct drm_i915_private *dev_priv = vm->i915; 1007 enum vgt_g2v_type msg; 1008 int i; 1009 1010 if (use_4lvl(vm)) { 1011 const u64 daddr = px_dma(&ppgtt->pml4); 1012 1013 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); 1014 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); 1015 1016 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : 1017 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); 1018 } else { 1019 for (i = 0; i < GEN8_3LVL_PDPES; i++) { 1020 const u64 daddr = i915_page_dir_dma_addr(ppgtt, i); 1021 1022 I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); 1023 I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); 1024 } 1025 1026 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : 1027 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); 1028 } 1029 1030 I915_WRITE(vgtif_reg(g2v_notify), msg); 1031 1032 return 0; 1033 } 1034 1035 static void gen8_free_scratch(struct i915_address_space *vm) 1036 { 1037 if (use_4lvl(vm)) 1038 free_pdp(vm, vm->scratch_pdp); 1039 free_pd(vm, vm->scratch_pd); 1040 free_pt(vm, vm->scratch_pt); 1041 cleanup_scratch_page(vm); 1042 } 1043 1044 static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, 1045 struct i915_page_directory_pointer *pdp) 1046 { 1047 const unsigned int pdpes = i915_pdpes_per_pdp(vm); 1048 int i; 1049 1050 for (i = 0; i < pdpes; i++) { 1051 if (pdp->page_directory[i] == vm->scratch_pd) 1052 continue; 1053 1054 gen8_free_page_tables(vm, pdp->page_directory[i]); 1055 free_pd(vm, pdp->page_directory[i]); 1056 } 1057 1058 free_pdp(vm, pdp); 1059 } 1060 1061 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) 1062 { 1063 int i; 1064 1065 for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) { 1066 if (ppgtt->pml4.pdps[i] == ppgtt->base.scratch_pdp) 1067 continue; 1068 1069 gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]); 1070 } 1071 1072 cleanup_px(&ppgtt->base, &ppgtt->pml4); 1073 } 1074 1075 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 1076 { 1077 struct drm_i915_private *dev_priv = vm->i915; 1078 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1079 1080 if (intel_vgpu_active(dev_priv)) 1081 gen8_ppgtt_notify_vgt(ppgtt, false); 1082 1083 if (use_4lvl(vm)) 1084 gen8_ppgtt_cleanup_4lvl(ppgtt); 1085 else 1086 gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp); 1087 1088 gen8_free_scratch(vm); 1089 } 1090 1091 static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, 1092 struct i915_page_directory *pd, 1093 u64 start, u64 length) 1094 { 1095 struct i915_page_table *pt; 1096 u64 from = start; 1097 unsigned int pde; 1098 1099 gen8_for_each_pde(pt, pd, start, length, pde) { 1100 if (pt == vm->scratch_pt) { 1101 pt = alloc_pt(vm); 1102 if (IS_ERR(pt)) 1103 goto unwind; 1104 1105 gen8_initialize_pt(vm, pt); 1106 1107 gen8_ppgtt_set_pde(vm, pd, pt, pde); 1108 pd->used_pdes++; 1109 GEM_BUG_ON(pd->used_pdes > I915_PDES); 1110 } 1111 1112 pt->used_ptes += gen8_pte_count(start, length); 1113 } 1114 return 0; 1115 1116 unwind: 1117 gen8_ppgtt_clear_pd(vm, pd, from, start - from); 1118 return -ENOMEM; 1119 } 1120 1121 static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, 1122 struct i915_page_directory_pointer *pdp, 1123 u64 start, u64 length) 1124 { 1125 struct i915_page_directory *pd; 1126 u64 from = start; 1127 unsigned int pdpe; 1128 int ret; 1129 1130 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1131 if (pd == vm->scratch_pd) { 1132 pd = alloc_pd(vm); 1133 if (IS_ERR(pd)) 1134 goto unwind; 1135 1136 gen8_initialize_pd(vm, pd); 1137 gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); 1138 pdp->used_pdpes++; 1139 GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm)); 1140 1141 mark_tlbs_dirty(i915_vm_to_ppgtt(vm)); 1142 } 1143 1144 ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); 1145 if (unlikely(ret)) 1146 goto unwind_pd; 1147 } 1148 1149 return 0; 1150 1151 unwind_pd: 1152 if (!pd->used_pdes) { 1153 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); 1154 GEM_BUG_ON(!pdp->used_pdpes); 1155 pdp->used_pdpes--; 1156 free_pd(vm, pd); 1157 } 1158 unwind: 1159 gen8_ppgtt_clear_pdp(vm, pdp, from, start - from); 1160 return -ENOMEM; 1161 } 1162 1163 static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm, 1164 u64 start, u64 length) 1165 { 1166 return gen8_ppgtt_alloc_pdp(vm, 1167 &i915_vm_to_ppgtt(vm)->pdp, start, length); 1168 } 1169 1170 static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, 1171 u64 start, u64 length) 1172 { 1173 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1174 struct i915_pml4 *pml4 = &ppgtt->pml4; 1175 struct i915_page_directory_pointer *pdp; 1176 u64 from = start; 1177 u32 pml4e; 1178 int ret; 1179 1180 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1181 if (pml4->pdps[pml4e] == vm->scratch_pdp) { 1182 pdp = alloc_pdp(vm); 1183 if (IS_ERR(pdp)) 1184 goto unwind; 1185 1186 gen8_initialize_pdp(vm, pdp); 1187 gen8_ppgtt_set_pml4e(pml4, pdp, pml4e); 1188 } 1189 1190 ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length); 1191 if (unlikely(ret)) 1192 goto unwind_pdp; 1193 } 1194 1195 return 0; 1196 1197 unwind_pdp: 1198 if (!pdp->used_pdpes) { 1199 gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); 1200 free_pdp(vm, pdp); 1201 } 1202 unwind: 1203 gen8_ppgtt_clear_4lvl(vm, from, start - from); 1204 return -ENOMEM; 1205 } 1206 1207 static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, 1208 struct i915_page_directory_pointer *pdp, 1209 u64 start, u64 length, 1210 gen8_pte_t scratch_pte, 1211 struct seq_file *m) 1212 { 1213 struct i915_address_space *vm = &ppgtt->base; 1214 struct i915_page_directory *pd; 1215 u32 pdpe; 1216 1217 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1218 struct i915_page_table *pt; 1219 u64 pd_len = length; 1220 u64 pd_start = start; 1221 u32 pde; 1222 1223 if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd) 1224 continue; 1225 1226 seq_printf(m, "\tPDPE #%d\n", pdpe); 1227 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1228 u32 pte; 1229 gen8_pte_t *pt_vaddr; 1230 1231 if (pd->page_table[pde] == ppgtt->base.scratch_pt) 1232 continue; 1233 1234 pt_vaddr = kmap_atomic_px(pt); 1235 for (pte = 0; pte < GEN8_PTES; pte += 4) { 1236 u64 va = (pdpe << GEN8_PDPE_SHIFT | 1237 pde << GEN8_PDE_SHIFT | 1238 pte << GEN8_PTE_SHIFT); 1239 int i; 1240 bool found = false; 1241 1242 for (i = 0; i < 4; i++) 1243 if (pt_vaddr[pte + i] != scratch_pte) 1244 found = true; 1245 if (!found) 1246 continue; 1247 1248 seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte); 1249 for (i = 0; i < 4; i++) { 1250 if (pt_vaddr[pte + i] != scratch_pte) 1251 seq_printf(m, " %llx", pt_vaddr[pte + i]); 1252 else 1253 seq_puts(m, " SCRATCH "); 1254 } 1255 seq_puts(m, "\n"); 1256 } 1257 kunmap_atomic(pt_vaddr); 1258 } 1259 } 1260 } 1261 1262 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1263 { 1264 struct i915_address_space *vm = &ppgtt->base; 1265 const gen8_pte_t scratch_pte = 1266 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); 1267 u64 start = 0, length = ppgtt->base.total; 1268 1269 if (use_4lvl(vm)) { 1270 u64 pml4e; 1271 struct i915_pml4 *pml4 = &ppgtt->pml4; 1272 struct i915_page_directory_pointer *pdp; 1273 1274 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1275 if (pml4->pdps[pml4e] == ppgtt->base.scratch_pdp) 1276 continue; 1277 1278 seq_printf(m, " PML4E #%llu\n", pml4e); 1279 gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m); 1280 } 1281 } else { 1282 gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m); 1283 } 1284 } 1285 1286 static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt) 1287 { 1288 struct i915_address_space *vm = &ppgtt->base; 1289 struct i915_page_directory_pointer *pdp = &ppgtt->pdp; 1290 struct i915_page_directory *pd; 1291 u64 start = 0, length = ppgtt->base.total; 1292 u64 from = start; 1293 unsigned int pdpe; 1294 1295 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1296 pd = alloc_pd(vm); 1297 if (IS_ERR(pd)) 1298 goto unwind; 1299 1300 gen8_initialize_pd(vm, pd); 1301 gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); 1302 pdp->used_pdpes++; 1303 } 1304 1305 pdp->used_pdpes++; /* never remove */ 1306 return 0; 1307 1308 unwind: 1309 start -= from; 1310 gen8_for_each_pdpe(pd, pdp, from, start, pdpe) { 1311 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); 1312 free_pd(vm, pd); 1313 } 1314 pdp->used_pdpes = 0; 1315 return -ENOMEM; 1316 } 1317 1318 /* 1319 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 1320 * with a net effect resembling a 2-level page table in normal x86 terms. Each 1321 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 1322 * space. 1323 * 1324 */ 1325 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1326 { 1327 struct i915_address_space *vm = &ppgtt->base; 1328 struct drm_i915_private *dev_priv = vm->i915; 1329 int ret; 1330 1331 ppgtt->base.total = USES_FULL_48BIT_PPGTT(dev_priv) ? 1332 1ULL << 48 : 1333 1ULL << 32; 1334 1335 ret = gen8_init_scratch(&ppgtt->base); 1336 if (ret) { 1337 ppgtt->base.total = 0; 1338 return ret; 1339 } 1340 1341 /* There are only few exceptions for gen >=6. chv and bxt. 1342 * And we are not sure about the latter so play safe for now. 1343 */ 1344 if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) 1345 ppgtt->base.pt_kmap_wc = true; 1346 1347 if (use_4lvl(vm)) { 1348 ret = setup_px(&ppgtt->base, &ppgtt->pml4); 1349 if (ret) 1350 goto free_scratch; 1351 1352 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); 1353 1354 ppgtt->switch_mm = gen8_mm_switch_4lvl; 1355 ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl; 1356 ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; 1357 ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl; 1358 } else { 1359 ret = __pdp_init(&ppgtt->base, &ppgtt->pdp); 1360 if (ret) 1361 goto free_scratch; 1362 1363 if (intel_vgpu_active(dev_priv)) { 1364 ret = gen8_preallocate_top_level_pdp(ppgtt); 1365 if (ret) { 1366 __pdp_fini(&ppgtt->pdp); 1367 goto free_scratch; 1368 } 1369 } 1370 1371 ppgtt->switch_mm = gen8_mm_switch_3lvl; 1372 ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_3lvl; 1373 ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl; 1374 ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl; 1375 } 1376 1377 if (intel_vgpu_active(dev_priv)) 1378 gen8_ppgtt_notify_vgt(ppgtt, true); 1379 1380 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 1381 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 1382 ppgtt->base.bind_vma = ppgtt_bind_vma; 1383 ppgtt->debug_dump = gen8_dump_ppgtt; 1384 1385 return 0; 1386 1387 free_scratch: 1388 gen8_free_scratch(&ppgtt->base); 1389 return ret; 1390 } 1391 1392 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1393 { 1394 struct i915_address_space *vm = &ppgtt->base; 1395 struct i915_page_table *unused; 1396 gen6_pte_t scratch_pte; 1397 u32 pd_entry, pte, pde; 1398 u32 start = 0, length = ppgtt->base.total; 1399 1400 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 1401 I915_CACHE_LLC, 0); 1402 1403 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) { 1404 u32 expected; 1405 gen6_pte_t *pt_vaddr; 1406 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]); 1407 pd_entry = readl(ppgtt->pd_addr + pde); 1408 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 1409 1410 if (pd_entry != expected) 1411 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 1412 pde, 1413 pd_entry, 1414 expected); 1415 seq_printf(m, "\tPDE: %x\n", pd_entry); 1416 1417 pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[pde]); 1418 1419 for (pte = 0; pte < GEN6_PTES; pte+=4) { 1420 unsigned long va = 1421 (pde * PAGE_SIZE * GEN6_PTES) + 1422 (pte * PAGE_SIZE); 1423 int i; 1424 bool found = false; 1425 for (i = 0; i < 4; i++) 1426 if (pt_vaddr[pte + i] != scratch_pte) 1427 found = true; 1428 if (!found) 1429 continue; 1430 1431 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 1432 for (i = 0; i < 4; i++) { 1433 if (pt_vaddr[pte + i] != scratch_pte) 1434 seq_printf(m, " %08x", pt_vaddr[pte + i]); 1435 else 1436 seq_puts(m, " SCRATCH "); 1437 } 1438 seq_puts(m, "\n"); 1439 } 1440 kunmap_atomic(pt_vaddr); 1441 } 1442 } 1443 1444 /* Write pde (index) from the page directory @pd to the page table @pt */ 1445 static inline void gen6_write_pde(const struct i915_hw_ppgtt *ppgtt, 1446 const unsigned int pde, 1447 const struct i915_page_table *pt) 1448 { 1449 /* Caller needs to make sure the write completes if necessary */ 1450 writel_relaxed(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID, 1451 ppgtt->pd_addr + pde); 1452 } 1453 1454 /* Write all the page tables found in the ppgtt structure to incrementing page 1455 * directories. */ 1456 static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt, 1457 u32 start, u32 length) 1458 { 1459 struct i915_page_table *pt; 1460 unsigned int pde; 1461 1462 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) 1463 gen6_write_pde(ppgtt, pde, pt); 1464 1465 mark_tlbs_dirty(ppgtt); 1466 wmb(); 1467 } 1468 1469 static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt) 1470 { 1471 GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); 1472 return ppgtt->pd.base.ggtt_offset << 10; 1473 } 1474 1475 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 1476 struct drm_i915_gem_request *req) 1477 { 1478 struct intel_engine_cs *engine = req->engine; 1479 u32 *cs; 1480 1481 /* NB: TLBs must be flushed and invalidated before a switch */ 1482 cs = intel_ring_begin(req, 6); 1483 if (IS_ERR(cs)) 1484 return PTR_ERR(cs); 1485 1486 *cs++ = MI_LOAD_REGISTER_IMM(2); 1487 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); 1488 *cs++ = PP_DIR_DCLV_2G; 1489 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); 1490 *cs++ = get_pd_offset(ppgtt); 1491 *cs++ = MI_NOOP; 1492 intel_ring_advance(req, cs); 1493 1494 return 0; 1495 } 1496 1497 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 1498 struct drm_i915_gem_request *req) 1499 { 1500 struct intel_engine_cs *engine = req->engine; 1501 u32 *cs; 1502 1503 /* NB: TLBs must be flushed and invalidated before a switch */ 1504 cs = intel_ring_begin(req, 6); 1505 if (IS_ERR(cs)) 1506 return PTR_ERR(cs); 1507 1508 *cs++ = MI_LOAD_REGISTER_IMM(2); 1509 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); 1510 *cs++ = PP_DIR_DCLV_2G; 1511 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); 1512 *cs++ = get_pd_offset(ppgtt); 1513 *cs++ = MI_NOOP; 1514 intel_ring_advance(req, cs); 1515 1516 return 0; 1517 } 1518 1519 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 1520 struct drm_i915_gem_request *req) 1521 { 1522 struct intel_engine_cs *engine = req->engine; 1523 struct drm_i915_private *dev_priv = req->i915; 1524 1525 I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); 1526 I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); 1527 return 0; 1528 } 1529 1530 static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv) 1531 { 1532 struct intel_engine_cs *engine; 1533 enum intel_engine_id id; 1534 1535 for_each_engine(engine, dev_priv, id) { 1536 u32 four_level = USES_FULL_48BIT_PPGTT(dev_priv) ? 1537 GEN8_GFX_PPGTT_48B : 0; 1538 I915_WRITE(RING_MODE_GEN7(engine), 1539 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level)); 1540 } 1541 } 1542 1543 static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv) 1544 { 1545 struct intel_engine_cs *engine; 1546 u32 ecochk, ecobits; 1547 enum intel_engine_id id; 1548 1549 ecobits = I915_READ(GAC_ECO_BITS); 1550 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 1551 1552 ecochk = I915_READ(GAM_ECOCHK); 1553 if (IS_HASWELL(dev_priv)) { 1554 ecochk |= ECOCHK_PPGTT_WB_HSW; 1555 } else { 1556 ecochk |= ECOCHK_PPGTT_LLC_IVB; 1557 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 1558 } 1559 I915_WRITE(GAM_ECOCHK, ecochk); 1560 1561 for_each_engine(engine, dev_priv, id) { 1562 /* GFX_MODE is per-ring on gen7+ */ 1563 I915_WRITE(RING_MODE_GEN7(engine), 1564 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1565 } 1566 } 1567 1568 static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv) 1569 { 1570 u32 ecochk, gab_ctl, ecobits; 1571 1572 ecobits = I915_READ(GAC_ECO_BITS); 1573 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 1574 ECOBITS_PPGTT_CACHE64B); 1575 1576 gab_ctl = I915_READ(GAB_CTL); 1577 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 1578 1579 ecochk = I915_READ(GAM_ECOCHK); 1580 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 1581 1582 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1583 } 1584 1585 /* PPGTT support for Sandybdrige/Gen6 and later */ 1586 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 1587 u64 start, u64 length) 1588 { 1589 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1590 unsigned int first_entry = start >> PAGE_SHIFT; 1591 unsigned int pde = first_entry / GEN6_PTES; 1592 unsigned int pte = first_entry % GEN6_PTES; 1593 unsigned int num_entries = length >> PAGE_SHIFT; 1594 gen6_pte_t scratch_pte = 1595 vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); 1596 1597 while (num_entries) { 1598 struct i915_page_table *pt = ppgtt->pd.page_table[pde++]; 1599 unsigned int end = min(pte + num_entries, GEN6_PTES); 1600 gen6_pte_t *vaddr; 1601 1602 num_entries -= end - pte; 1603 1604 /* Note that the hw doesn't support removing PDE on the fly 1605 * (they are cached inside the context with no means to 1606 * invalidate the cache), so we can only reset the PTE 1607 * entries back to scratch. 1608 */ 1609 1610 vaddr = kmap_atomic_px(pt); 1611 do { 1612 vaddr[pte++] = scratch_pte; 1613 } while (pte < end); 1614 kunmap_atomic(vaddr); 1615 1616 pte = 0; 1617 } 1618 } 1619 1620 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 1621 struct sg_table *pages, 1622 u64 start, 1623 enum i915_cache_level cache_level, 1624 u32 flags) 1625 { 1626 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1627 unsigned first_entry = start >> PAGE_SHIFT; 1628 unsigned act_pt = first_entry / GEN6_PTES; 1629 unsigned act_pte = first_entry % GEN6_PTES; 1630 const u32 pte_encode = vm->pte_encode(0, cache_level, flags); 1631 struct sgt_dma iter; 1632 gen6_pte_t *vaddr; 1633 1634 vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]); 1635 iter.sg = pages->sgl; 1636 iter.dma = sg_dma_address(iter.sg); 1637 iter.max = iter.dma + iter.sg->length; 1638 do { 1639 vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); 1640 1641 iter.dma += PAGE_SIZE; 1642 if (iter.dma == iter.max) { 1643 iter.sg = __sg_next(iter.sg); 1644 if (!iter.sg) 1645 break; 1646 1647 iter.dma = sg_dma_address(iter.sg); 1648 iter.max = iter.dma + iter.sg->length; 1649 } 1650 1651 if (++act_pte == GEN6_PTES) { 1652 kunmap_atomic(vaddr); 1653 vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]); 1654 act_pte = 0; 1655 } 1656 } while (1); 1657 kunmap_atomic(vaddr); 1658 } 1659 1660 static int gen6_alloc_va_range(struct i915_address_space *vm, 1661 u64 start, u64 length) 1662 { 1663 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1664 struct i915_page_table *pt; 1665 u64 from = start; 1666 unsigned int pde; 1667 bool flush = false; 1668 1669 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { 1670 if (pt == vm->scratch_pt) { 1671 pt = alloc_pt(vm); 1672 if (IS_ERR(pt)) 1673 goto unwind_out; 1674 1675 gen6_initialize_pt(vm, pt); 1676 ppgtt->pd.page_table[pde] = pt; 1677 gen6_write_pde(ppgtt, pde, pt); 1678 flush = true; 1679 } 1680 } 1681 1682 if (flush) { 1683 mark_tlbs_dirty(ppgtt); 1684 wmb(); 1685 } 1686 1687 return 0; 1688 1689 unwind_out: 1690 gen6_ppgtt_clear_range(vm, from, start); 1691 return -ENOMEM; 1692 } 1693 1694 static int gen6_init_scratch(struct i915_address_space *vm) 1695 { 1696 int ret; 1697 1698 ret = setup_scratch_page(vm, I915_GFP_DMA); 1699 if (ret) 1700 return ret; 1701 1702 vm->scratch_pt = alloc_pt(vm); 1703 if (IS_ERR(vm->scratch_pt)) { 1704 cleanup_scratch_page(vm); 1705 return PTR_ERR(vm->scratch_pt); 1706 } 1707 1708 gen6_initialize_pt(vm, vm->scratch_pt); 1709 1710 return 0; 1711 } 1712 1713 static void gen6_free_scratch(struct i915_address_space *vm) 1714 { 1715 free_pt(vm, vm->scratch_pt); 1716 cleanup_scratch_page(vm); 1717 } 1718 1719 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 1720 { 1721 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1722 struct i915_page_directory *pd = &ppgtt->pd; 1723 struct i915_page_table *pt; 1724 u32 pde; 1725 1726 drm_mm_remove_node(&ppgtt->node); 1727 1728 gen6_for_all_pdes(pt, pd, pde) 1729 if (pt != vm->scratch_pt) 1730 free_pt(vm, pt); 1731 1732 gen6_free_scratch(vm); 1733 } 1734 1735 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 1736 { 1737 struct i915_address_space *vm = &ppgtt->base; 1738 struct drm_i915_private *dev_priv = ppgtt->base.i915; 1739 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1740 int ret; 1741 1742 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 1743 * allocator works in address space sizes, so it's multiplied by page 1744 * size. We allocate at the top of the GTT to avoid fragmentation. 1745 */ 1746 BUG_ON(!drm_mm_initialized(&ggtt->base.mm)); 1747 1748 ret = gen6_init_scratch(vm); 1749 if (ret) 1750 return ret; 1751 1752 ret = i915_gem_gtt_insert(&ggtt->base, &ppgtt->node, 1753 GEN6_PD_SIZE, GEN6_PD_ALIGN, 1754 I915_COLOR_UNEVICTABLE, 1755 0, ggtt->base.total, 1756 PIN_HIGH); 1757 if (ret) 1758 goto err_out; 1759 1760 if (ppgtt->node.start < ggtt->mappable_end) 1761 DRM_DEBUG("Forced to use aperture for PDEs\n"); 1762 1763 ppgtt->pd.base.ggtt_offset = 1764 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); 1765 1766 ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + 1767 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); 1768 1769 return 0; 1770 1771 err_out: 1772 gen6_free_scratch(vm); 1773 return ret; 1774 } 1775 1776 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 1777 { 1778 return gen6_ppgtt_allocate_page_directories(ppgtt); 1779 } 1780 1781 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, 1782 u64 start, u64 length) 1783 { 1784 struct i915_page_table *unused; 1785 u32 pde; 1786 1787 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) 1788 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; 1789 } 1790 1791 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1792 { 1793 struct drm_i915_private *dev_priv = ppgtt->base.i915; 1794 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1795 int ret; 1796 1797 ppgtt->base.pte_encode = ggtt->base.pte_encode; 1798 if (intel_vgpu_active(dev_priv) || IS_GEN6(dev_priv)) 1799 ppgtt->switch_mm = gen6_mm_switch; 1800 else if (IS_HASWELL(dev_priv)) 1801 ppgtt->switch_mm = hsw_mm_switch; 1802 else if (IS_GEN7(dev_priv)) 1803 ppgtt->switch_mm = gen7_mm_switch; 1804 else 1805 BUG(); 1806 1807 ret = gen6_ppgtt_alloc(ppgtt); 1808 if (ret) 1809 return ret; 1810 1811 ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; 1812 1813 gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); 1814 gen6_write_page_range(ppgtt, 0, ppgtt->base.total); 1815 1816 ret = gen6_alloc_va_range(&ppgtt->base, 0, ppgtt->base.total); 1817 if (ret) { 1818 gen6_ppgtt_cleanup(&ppgtt->base); 1819 return ret; 1820 } 1821 1822 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 1823 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 1824 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 1825 ppgtt->base.bind_vma = ppgtt_bind_vma; 1826 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 1827 ppgtt->debug_dump = gen6_dump_ppgtt; 1828 1829 DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", 1830 ppgtt->node.size >> 20, 1831 ppgtt->node.start / PAGE_SIZE); 1832 1833 DRM_DEBUG_DRIVER("Adding PPGTT at offset %x\n", 1834 ppgtt->pd.base.ggtt_offset << 10); 1835 1836 return 0; 1837 } 1838 1839 static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, 1840 struct drm_i915_private *dev_priv) 1841 { 1842 ppgtt->base.i915 = dev_priv; 1843 ppgtt->base.dma = &dev_priv->drm.pdev->dev; 1844 1845 if (INTEL_INFO(dev_priv)->gen < 8) 1846 return gen6_ppgtt_init(ppgtt); 1847 else 1848 return gen8_ppgtt_init(ppgtt); 1849 } 1850 1851 static void i915_address_space_init(struct i915_address_space *vm, 1852 struct drm_i915_private *dev_priv, 1853 const char *name) 1854 { 1855 i915_gem_timeline_init(dev_priv, &vm->timeline, name); 1856 1857 drm_mm_init(&vm->mm, 0, vm->total); 1858 vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; 1859 1860 INIT_LIST_HEAD(&vm->active_list); 1861 INIT_LIST_HEAD(&vm->inactive_list); 1862 INIT_LIST_HEAD(&vm->unbound_list); 1863 1864 list_add_tail(&vm->global_link, &dev_priv->vm_list); 1865 pagevec_init(&vm->free_pages, false); 1866 } 1867 1868 static void i915_address_space_fini(struct i915_address_space *vm) 1869 { 1870 if (pagevec_count(&vm->free_pages)) 1871 vm_free_pages_release(vm); 1872 1873 i915_gem_timeline_fini(&vm->timeline); 1874 drm_mm_takedown(&vm->mm); 1875 list_del(&vm->global_link); 1876 } 1877 1878 static void gtt_write_workarounds(struct drm_i915_private *dev_priv) 1879 { 1880 /* This function is for gtt related workarounds. This function is 1881 * called on driver load and after a GPU reset, so you can place 1882 * workarounds here even if they get overwritten by GPU reset. 1883 */ 1884 /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk */ 1885 if (IS_BROADWELL(dev_priv)) 1886 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 1887 else if (IS_CHERRYVIEW(dev_priv)) 1888 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 1889 else if (IS_GEN9_BC(dev_priv)) 1890 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 1891 else if (IS_GEN9_LP(dev_priv)) 1892 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 1893 } 1894 1895 int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv) 1896 { 1897 gtt_write_workarounds(dev_priv); 1898 1899 /* In the case of execlists, PPGTT is enabled by the context descriptor 1900 * and the PDPs are contained within the context itself. We don't 1901 * need to do anything here. */ 1902 if (i915.enable_execlists) 1903 return 0; 1904 1905 if (!USES_PPGTT(dev_priv)) 1906 return 0; 1907 1908 if (IS_GEN6(dev_priv)) 1909 gen6_ppgtt_enable(dev_priv); 1910 else if (IS_GEN7(dev_priv)) 1911 gen7_ppgtt_enable(dev_priv); 1912 else if (INTEL_GEN(dev_priv) >= 8) 1913 gen8_ppgtt_enable(dev_priv); 1914 else 1915 MISSING_CASE(INTEL_GEN(dev_priv)); 1916 1917 return 0; 1918 } 1919 1920 struct i915_hw_ppgtt * 1921 i915_ppgtt_create(struct drm_i915_private *dev_priv, 1922 struct drm_i915_file_private *fpriv, 1923 const char *name) 1924 { 1925 struct i915_hw_ppgtt *ppgtt; 1926 int ret; 1927 1928 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 1929 if (!ppgtt) 1930 return ERR_PTR(-ENOMEM); 1931 1932 ret = __hw_ppgtt_init(ppgtt, dev_priv); 1933 if (ret) { 1934 kfree(ppgtt); 1935 return ERR_PTR(ret); 1936 } 1937 1938 kref_init(&ppgtt->ref); 1939 i915_address_space_init(&ppgtt->base, dev_priv, name); 1940 ppgtt->base.file = fpriv; 1941 1942 trace_i915_ppgtt_create(&ppgtt->base); 1943 1944 return ppgtt; 1945 } 1946 1947 void i915_ppgtt_close(struct i915_address_space *vm) 1948 { 1949 struct list_head *phases[] = { 1950 &vm->active_list, 1951 &vm->inactive_list, 1952 &vm->unbound_list, 1953 NULL, 1954 }, **phase; 1955 1956 GEM_BUG_ON(vm->closed); 1957 vm->closed = true; 1958 1959 for (phase = phases; *phase; phase++) { 1960 struct i915_vma *vma, *vn; 1961 1962 list_for_each_entry_safe(vma, vn, *phase, vm_link) 1963 if (!i915_vma_is_closed(vma)) 1964 i915_vma_close(vma); 1965 } 1966 } 1967 1968 void i915_ppgtt_release(struct kref *kref) 1969 { 1970 struct i915_hw_ppgtt *ppgtt = 1971 container_of(kref, struct i915_hw_ppgtt, ref); 1972 1973 trace_i915_ppgtt_release(&ppgtt->base); 1974 1975 /* vmas should already be unbound and destroyed */ 1976 WARN_ON(!list_empty(&ppgtt->base.active_list)); 1977 WARN_ON(!list_empty(&ppgtt->base.inactive_list)); 1978 WARN_ON(!list_empty(&ppgtt->base.unbound_list)); 1979 1980 ppgtt->base.cleanup(&ppgtt->base); 1981 i915_address_space_fini(&ppgtt->base); 1982 kfree(ppgtt); 1983 } 1984 1985 /* Certain Gen5 chipsets require require idling the GPU before 1986 * unmapping anything from the GTT when VT-d is enabled. 1987 */ 1988 static bool needs_idle_maps(struct drm_i915_private *dev_priv) 1989 { 1990 #ifdef CONFIG_INTEL_IOMMU 1991 /* Query intel_iommu to see if we need the workaround. Presumably that 1992 * was loaded first. 1993 */ 1994 if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped) 1995 return true; 1996 #endif 1997 return false; 1998 } 1999 2000 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) 2001 { 2002 struct intel_engine_cs *engine; 2003 enum intel_engine_id id; 2004 2005 if (INTEL_INFO(dev_priv)->gen < 6) 2006 return; 2007 2008 for_each_engine(engine, dev_priv, id) { 2009 u32 fault_reg; 2010 fault_reg = I915_READ(RING_FAULT_REG(engine)); 2011 if (fault_reg & RING_FAULT_VALID) { 2012 DRM_DEBUG_DRIVER("Unexpected fault\n" 2013 "\tAddr: 0x%08lx\n" 2014 "\tAddress space: %s\n" 2015 "\tSource ID: %d\n" 2016 "\tType: %d\n", 2017 fault_reg & PAGE_MASK, 2018 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 2019 RING_FAULT_SRCID(fault_reg), 2020 RING_FAULT_FAULT_TYPE(fault_reg)); 2021 I915_WRITE(RING_FAULT_REG(engine), 2022 fault_reg & ~RING_FAULT_VALID); 2023 } 2024 } 2025 2026 /* Engine specific init may not have been done till this point. */ 2027 if (dev_priv->engine[RCS]) 2028 POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS])); 2029 } 2030 2031 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv) 2032 { 2033 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2034 2035 /* Don't bother messing with faults pre GEN6 as we have little 2036 * documentation supporting that it's a good idea. 2037 */ 2038 if (INTEL_GEN(dev_priv) < 6) 2039 return; 2040 2041 i915_check_and_clear_faults(dev_priv); 2042 2043 ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total); 2044 2045 i915_ggtt_invalidate(dev_priv); 2046 } 2047 2048 int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, 2049 struct sg_table *pages) 2050 { 2051 do { 2052 if (dma_map_sg(&obj->base.dev->pdev->dev, 2053 pages->sgl, pages->nents, 2054 PCI_DMA_BIDIRECTIONAL)) 2055 return 0; 2056 2057 /* If the DMA remap fails, one cause can be that we have 2058 * too many objects pinned in a small remapping table, 2059 * such as swiotlb. Incrementally purge all other objects and 2060 * try again - if there are no more pages to remove from 2061 * the DMA remapper, i915_gem_shrink will return 0. 2062 */ 2063 GEM_BUG_ON(obj->mm.pages == pages); 2064 } while (i915_gem_shrink(to_i915(obj->base.dev), 2065 obj->base.size >> PAGE_SHIFT, 2066 I915_SHRINK_BOUND | 2067 I915_SHRINK_UNBOUND | 2068 I915_SHRINK_ACTIVE)); 2069 2070 return -ENOSPC; 2071 } 2072 2073 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 2074 { 2075 writeq(pte, addr); 2076 } 2077 2078 static void gen8_ggtt_insert_page(struct i915_address_space *vm, 2079 dma_addr_t addr, 2080 u64 offset, 2081 enum i915_cache_level level, 2082 u32 unused) 2083 { 2084 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2085 gen8_pte_t __iomem *pte = 2086 (gen8_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT); 2087 2088 gen8_set_pte(pte, gen8_pte_encode(addr, level)); 2089 2090 ggtt->invalidate(vm->i915); 2091 } 2092 2093 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 2094 struct sg_table *st, 2095 u64 start, 2096 enum i915_cache_level level, 2097 u32 unused) 2098 { 2099 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2100 struct sgt_iter sgt_iter; 2101 gen8_pte_t __iomem *gtt_entries; 2102 const gen8_pte_t pte_encode = gen8_pte_encode(0, level); 2103 dma_addr_t addr; 2104 2105 gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; 2106 gtt_entries += start >> PAGE_SHIFT; 2107 for_each_sgt_dma(addr, sgt_iter, st) 2108 gen8_set_pte(gtt_entries++, pte_encode | addr); 2109 2110 wmb(); 2111 2112 /* This next bit makes the above posting read even more important. We 2113 * want to flush the TLBs only after we're certain all the PTE updates 2114 * have finished. 2115 */ 2116 ggtt->invalidate(vm->i915); 2117 } 2118 2119 static void gen6_ggtt_insert_page(struct i915_address_space *vm, 2120 dma_addr_t addr, 2121 u64 offset, 2122 enum i915_cache_level level, 2123 u32 flags) 2124 { 2125 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2126 gen6_pte_t __iomem *pte = 2127 (gen6_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT); 2128 2129 iowrite32(vm->pte_encode(addr, level, flags), pte); 2130 2131 ggtt->invalidate(vm->i915); 2132 } 2133 2134 /* 2135 * Binds an object into the global gtt with the specified cache level. The object 2136 * will be accessible to the GPU via commands whose operands reference offsets 2137 * within the global GTT as well as accessible by the GPU through the GMADR 2138 * mapped BAR (dev_priv->mm.gtt->gtt). 2139 */ 2140 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 2141 struct sg_table *st, 2142 u64 start, 2143 enum i915_cache_level level, 2144 u32 flags) 2145 { 2146 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2147 gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; 2148 unsigned int i = start >> PAGE_SHIFT; 2149 struct sgt_iter iter; 2150 dma_addr_t addr; 2151 for_each_sgt_dma(addr, iter, st) 2152 iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); 2153 wmb(); 2154 2155 /* This next bit makes the above posting read even more important. We 2156 * want to flush the TLBs only after we're certain all the PTE updates 2157 * have finished. 2158 */ 2159 ggtt->invalidate(vm->i915); 2160 } 2161 2162 static void nop_clear_range(struct i915_address_space *vm, 2163 u64 start, u64 length) 2164 { 2165 } 2166 2167 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 2168 u64 start, u64 length) 2169 { 2170 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2171 unsigned first_entry = start >> PAGE_SHIFT; 2172 unsigned num_entries = length >> PAGE_SHIFT; 2173 const gen8_pte_t scratch_pte = 2174 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); 2175 gen8_pte_t __iomem *gtt_base = 2176 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 2177 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2178 int i; 2179 2180 if (WARN(num_entries > max_entries, 2181 "First entry = %d; Num entries = %d (max=%d)\n", 2182 first_entry, num_entries, max_entries)) 2183 num_entries = max_entries; 2184 2185 for (i = 0; i < num_entries; i++) 2186 gen8_set_pte(>t_base[i], scratch_pte); 2187 } 2188 2189 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 2190 u64 start, u64 length) 2191 { 2192 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2193 unsigned first_entry = start >> PAGE_SHIFT; 2194 unsigned num_entries = length >> PAGE_SHIFT; 2195 gen6_pte_t scratch_pte, __iomem *gtt_base = 2196 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 2197 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2198 int i; 2199 2200 if (WARN(num_entries > max_entries, 2201 "First entry = %d; Num entries = %d (max=%d)\n", 2202 first_entry, num_entries, max_entries)) 2203 num_entries = max_entries; 2204 2205 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 2206 I915_CACHE_LLC, 0); 2207 2208 for (i = 0; i < num_entries; i++) 2209 iowrite32(scratch_pte, >t_base[i]); 2210 } 2211 2212 static void i915_ggtt_insert_page(struct i915_address_space *vm, 2213 dma_addr_t addr, 2214 u64 offset, 2215 enum i915_cache_level cache_level, 2216 u32 unused) 2217 { 2218 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2219 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2220 2221 intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); 2222 } 2223 2224 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 2225 struct sg_table *pages, 2226 u64 start, 2227 enum i915_cache_level cache_level, 2228 u32 unused) 2229 { 2230 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2231 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2232 2233 intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags); 2234 } 2235 2236 static void i915_ggtt_clear_range(struct i915_address_space *vm, 2237 u64 start, u64 length) 2238 { 2239 intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); 2240 } 2241 2242 static int ggtt_bind_vma(struct i915_vma *vma, 2243 enum i915_cache_level cache_level, 2244 u32 flags) 2245 { 2246 struct drm_i915_private *i915 = vma->vm->i915; 2247 struct drm_i915_gem_object *obj = vma->obj; 2248 u32 pte_flags; 2249 2250 if (unlikely(!vma->pages)) { 2251 int ret = i915_get_ggtt_vma_pages(vma); 2252 if (ret) 2253 return ret; 2254 } 2255 2256 /* Currently applicable only to VLV */ 2257 pte_flags = 0; 2258 if (obj->gt_ro) 2259 pte_flags |= PTE_READ_ONLY; 2260 2261 intel_runtime_pm_get(i915); 2262 vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, 2263 cache_level, pte_flags); 2264 intel_runtime_pm_put(i915); 2265 2266 /* 2267 * Without aliasing PPGTT there's no difference between 2268 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally 2269 * upgrade to both bound if we bind either to avoid double-binding. 2270 */ 2271 vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; 2272 2273 return 0; 2274 } 2275 2276 static void ggtt_unbind_vma(struct i915_vma *vma) 2277 { 2278 struct drm_i915_private *i915 = vma->vm->i915; 2279 2280 intel_runtime_pm_get(i915); 2281 vma->vm->clear_range(vma->vm, vma->node.start, vma->size); 2282 intel_runtime_pm_put(i915); 2283 } 2284 2285 static int aliasing_gtt_bind_vma(struct i915_vma *vma, 2286 enum i915_cache_level cache_level, 2287 u32 flags) 2288 { 2289 struct drm_i915_private *i915 = vma->vm->i915; 2290 u32 pte_flags; 2291 int ret; 2292 2293 if (unlikely(!vma->pages)) { 2294 ret = i915_get_ggtt_vma_pages(vma); 2295 if (ret) 2296 return ret; 2297 } 2298 2299 /* Currently applicable only to VLV */ 2300 pte_flags = 0; 2301 if (vma->obj->gt_ro) 2302 pte_flags |= PTE_READ_ONLY; 2303 2304 if (flags & I915_VMA_LOCAL_BIND) { 2305 struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; 2306 2307 if (appgtt->base.allocate_va_range) { 2308 ret = appgtt->base.allocate_va_range(&appgtt->base, 2309 vma->node.start, 2310 vma->node.size); 2311 if (ret) 2312 goto err_pages; 2313 } 2314 2315 appgtt->base.insert_entries(&appgtt->base, 2316 vma->pages, vma->node.start, 2317 cache_level, pte_flags); 2318 } 2319 2320 if (flags & I915_VMA_GLOBAL_BIND) { 2321 intel_runtime_pm_get(i915); 2322 vma->vm->insert_entries(vma->vm, 2323 vma->pages, vma->node.start, 2324 cache_level, pte_flags); 2325 intel_runtime_pm_put(i915); 2326 } 2327 2328 return 0; 2329 2330 err_pages: 2331 if (!(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND))) { 2332 if (vma->pages != vma->obj->mm.pages) { 2333 GEM_BUG_ON(!vma->pages); 2334 sg_free_table(vma->pages); 2335 kfree(vma->pages); 2336 } 2337 vma->pages = NULL; 2338 } 2339 return ret; 2340 } 2341 2342 static void aliasing_gtt_unbind_vma(struct i915_vma *vma) 2343 { 2344 struct drm_i915_private *i915 = vma->vm->i915; 2345 2346 if (vma->flags & I915_VMA_GLOBAL_BIND) { 2347 intel_runtime_pm_get(i915); 2348 vma->vm->clear_range(vma->vm, vma->node.start, vma->size); 2349 intel_runtime_pm_put(i915); 2350 } 2351 2352 if (vma->flags & I915_VMA_LOCAL_BIND) { 2353 struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->base; 2354 2355 vm->clear_range(vm, vma->node.start, vma->size); 2356 } 2357 } 2358 2359 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, 2360 struct sg_table *pages) 2361 { 2362 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2363 struct device *kdev = &dev_priv->drm.pdev->dev; 2364 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2365 2366 if (unlikely(ggtt->do_idle_maps)) { 2367 if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) { 2368 DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); 2369 /* Wait a bit, in hopes it avoids the hang */ 2370 udelay(10); 2371 } 2372 } 2373 2374 dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL); 2375 } 2376 2377 static void i915_gtt_color_adjust(const struct drm_mm_node *node, 2378 unsigned long color, 2379 u64 *start, 2380 u64 *end) 2381 { 2382 if (node->allocated && node->color != color) 2383 *start += I915_GTT_PAGE_SIZE; 2384 2385 /* Also leave a space between the unallocated reserved node after the 2386 * GTT and any objects within the GTT, i.e. we use the color adjustment 2387 * to insert a guard page to prevent prefetches crossing over the 2388 * GTT boundary. 2389 */ 2390 node = list_next_entry(node, node_list); 2391 if (node->color != color) 2392 *end -= I915_GTT_PAGE_SIZE; 2393 } 2394 2395 int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) 2396 { 2397 struct i915_ggtt *ggtt = &i915->ggtt; 2398 struct i915_hw_ppgtt *ppgtt; 2399 int err; 2400 2401 ppgtt = i915_ppgtt_create(i915, ERR_PTR(-EPERM), "[alias]"); 2402 if (IS_ERR(ppgtt)) 2403 return PTR_ERR(ppgtt); 2404 2405 if (WARN_ON(ppgtt->base.total < ggtt->base.total)) { 2406 err = -ENODEV; 2407 goto err_ppgtt; 2408 } 2409 2410 if (ppgtt->base.allocate_va_range) { 2411 /* Note we only pre-allocate as far as the end of the global 2412 * GTT. On 48b / 4-level page-tables, the difference is very, 2413 * very significant! We have to preallocate as GVT/vgpu does 2414 * not like the page directory disappearing. 2415 */ 2416 err = ppgtt->base.allocate_va_range(&ppgtt->base, 2417 0, ggtt->base.total); 2418 if (err) 2419 goto err_ppgtt; 2420 } 2421 2422 i915->mm.aliasing_ppgtt = ppgtt; 2423 2424 WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); 2425 ggtt->base.bind_vma = aliasing_gtt_bind_vma; 2426 2427 WARN_ON(ggtt->base.unbind_vma != ggtt_unbind_vma); 2428 ggtt->base.unbind_vma = aliasing_gtt_unbind_vma; 2429 2430 return 0; 2431 2432 err_ppgtt: 2433 i915_ppgtt_put(ppgtt); 2434 return err; 2435 } 2436 2437 void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915) 2438 { 2439 struct i915_ggtt *ggtt = &i915->ggtt; 2440 struct i915_hw_ppgtt *ppgtt; 2441 2442 ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt); 2443 if (!ppgtt) 2444 return; 2445 2446 i915_ppgtt_put(ppgtt); 2447 2448 ggtt->base.bind_vma = ggtt_bind_vma; 2449 ggtt->base.unbind_vma = ggtt_unbind_vma; 2450 } 2451 2452 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) 2453 { 2454 /* Let GEM Manage all of the aperture. 2455 * 2456 * However, leave one page at the end still bound to the scratch page. 2457 * There are a number of places where the hardware apparently prefetches 2458 * past the end of the object, and we've seen multiple hangs with the 2459 * GPU head pointer stuck in a batchbuffer bound at the last page of the 2460 * aperture. One page should be enough to keep any prefetching inside 2461 * of the aperture. 2462 */ 2463 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2464 unsigned long hole_start, hole_end; 2465 struct drm_mm_node *entry; 2466 int ret; 2467 2468 ret = intel_vgt_balloon(dev_priv); 2469 if (ret) 2470 return ret; 2471 2472 /* Reserve a mappable slot for our lockless error capture */ 2473 ret = drm_mm_insert_node_in_range(&ggtt->base.mm, &ggtt->error_capture, 2474 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 2475 0, ggtt->mappable_end, 2476 DRM_MM_INSERT_LOW); 2477 if (ret) 2478 return ret; 2479 2480 /* Clear any non-preallocated blocks */ 2481 drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { 2482 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 2483 hole_start, hole_end); 2484 ggtt->base.clear_range(&ggtt->base, hole_start, 2485 hole_end - hole_start); 2486 } 2487 2488 /* And finally clear the reserved guard page */ 2489 ggtt->base.clear_range(&ggtt->base, 2490 ggtt->base.total - PAGE_SIZE, PAGE_SIZE); 2491 2492 if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { 2493 ret = i915_gem_init_aliasing_ppgtt(dev_priv); 2494 if (ret) 2495 goto err; 2496 } 2497 2498 return 0; 2499 2500 err: 2501 drm_mm_remove_node(&ggtt->error_capture); 2502 return ret; 2503 } 2504 2505 /** 2506 * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization 2507 * @dev_priv: i915 device 2508 */ 2509 void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) 2510 { 2511 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2512 struct i915_vma *vma, *vn; 2513 2514 ggtt->base.closed = true; 2515 2516 mutex_lock(&dev_priv->drm.struct_mutex); 2517 WARN_ON(!list_empty(&ggtt->base.active_list)); 2518 list_for_each_entry_safe(vma, vn, &ggtt->base.inactive_list, vm_link) 2519 WARN_ON(i915_vma_unbind(vma)); 2520 mutex_unlock(&dev_priv->drm.struct_mutex); 2521 2522 i915_gem_cleanup_stolen(&dev_priv->drm); 2523 2524 mutex_lock(&dev_priv->drm.struct_mutex); 2525 i915_gem_fini_aliasing_ppgtt(dev_priv); 2526 2527 if (drm_mm_node_allocated(&ggtt->error_capture)) 2528 drm_mm_remove_node(&ggtt->error_capture); 2529 2530 if (drm_mm_initialized(&ggtt->base.mm)) { 2531 intel_vgt_deballoon(dev_priv); 2532 i915_address_space_fini(&ggtt->base); 2533 } 2534 2535 ggtt->base.cleanup(&ggtt->base); 2536 mutex_unlock(&dev_priv->drm.struct_mutex); 2537 2538 arch_phys_wc_del(ggtt->mtrr); 2539 io_mapping_fini(&ggtt->mappable); 2540 } 2541 2542 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 2543 { 2544 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 2545 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 2546 return snb_gmch_ctl << 20; 2547 } 2548 2549 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 2550 { 2551 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 2552 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 2553 if (bdw_gmch_ctl) 2554 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 2555 2556 #ifdef CONFIG_X86_32 2557 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ 2558 if (bdw_gmch_ctl > 4) 2559 bdw_gmch_ctl = 4; 2560 #endif 2561 2562 return bdw_gmch_ctl << 20; 2563 } 2564 2565 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 2566 { 2567 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 2568 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 2569 2570 if (gmch_ctrl) 2571 return 1 << (20 + gmch_ctrl); 2572 2573 return 0; 2574 } 2575 2576 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 2577 { 2578 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 2579 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 2580 return snb_gmch_ctl << 25; /* 32 MB units */ 2581 } 2582 2583 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 2584 { 2585 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2586 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 2587 return bdw_gmch_ctl << 25; /* 32 MB units */ 2588 } 2589 2590 static size_t chv_get_stolen_size(u16 gmch_ctrl) 2591 { 2592 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; 2593 gmch_ctrl &= SNB_GMCH_GMS_MASK; 2594 2595 /* 2596 * 0x0 to 0x10: 32MB increments starting at 0MB 2597 * 0x11 to 0x16: 4MB increments starting at 8MB 2598 * 0x17 to 0x1d: 4MB increments start at 36MB 2599 */ 2600 if (gmch_ctrl < 0x11) 2601 return gmch_ctrl << 25; 2602 else if (gmch_ctrl < 0x17) 2603 return (gmch_ctrl - 0x11 + 2) << 22; 2604 else 2605 return (gmch_ctrl - 0x17 + 9) << 22; 2606 } 2607 2608 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) 2609 { 2610 gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2611 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; 2612 2613 if (gen9_gmch_ctl < 0xf0) 2614 return gen9_gmch_ctl << 25; /* 32 MB units */ 2615 else 2616 /* 4MB increments starting at 0xf0 for 4MB */ 2617 return (gen9_gmch_ctl - 0xf0 + 1) << 22; 2618 } 2619 2620 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 2621 { 2622 struct drm_i915_private *dev_priv = ggtt->base.i915; 2623 struct pci_dev *pdev = dev_priv->drm.pdev; 2624 phys_addr_t phys_addr; 2625 int ret; 2626 2627 /* For Modern GENs the PTEs and register space are split in the BAR */ 2628 phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; 2629 2630 /* 2631 * On BXT writes larger than 64 bit to the GTT pagetable range will be 2632 * dropped. For WC mappings in general we have 64 byte burst writes 2633 * when the WC buffer is flushed, so we can't use it, but have to 2634 * resort to an uncached mapping. The WC issue is easily caught by the 2635 * readback check when writing GTT PTE entries. 2636 */ 2637 if (IS_GEN9_LP(dev_priv)) 2638 ggtt->gsm = ioremap_nocache(phys_addr, size); 2639 else 2640 ggtt->gsm = ioremap_wc(phys_addr, size); 2641 if (!ggtt->gsm) { 2642 DRM_ERROR("Failed to map the ggtt page table\n"); 2643 return -ENOMEM; 2644 } 2645 2646 ret = setup_scratch_page(&ggtt->base, GFP_DMA32); 2647 if (ret) { 2648 DRM_ERROR("Scratch setup failed\n"); 2649 /* iounmap will also get called at remove, but meh */ 2650 iounmap(ggtt->gsm); 2651 return ret; 2652 } 2653 2654 return 0; 2655 } 2656 2657 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 2658 * bits. When using advanced contexts each context stores its own PAT, but 2659 * writing this data shouldn't be harmful even in those cases. */ 2660 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) 2661 { 2662 u64 pat; 2663 2664 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 2665 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 2666 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 2667 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 2668 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 2669 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 2670 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 2671 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 2672 2673 if (!USES_PPGTT(dev_priv)) 2674 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry, 2675 * so RTL will always use the value corresponding to 2676 * pat_sel = 000". 2677 * So let's disable cache for GGTT to avoid screen corruptions. 2678 * MOCS still can be used though. 2679 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work 2680 * before this patch, i.e. the same uncached + snooping access 2681 * like on gen6/7 seems to be in effect. 2682 * - So this just fixes blitter/render access. Again it looks 2683 * like it's not just uncached access, but uncached + snooping. 2684 * So we can still hold onto all our assumptions wrt cpu 2685 * clflushing on LLC machines. 2686 */ 2687 pat = GEN8_PPAT(0, GEN8_PPAT_UC); 2688 2689 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 2690 * write would work. */ 2691 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 2692 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 2693 } 2694 2695 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) 2696 { 2697 u64 pat; 2698 2699 /* 2700 * Map WB on BDW to snooped on CHV. 2701 * 2702 * Only the snoop bit has meaning for CHV, the rest is 2703 * ignored. 2704 * 2705 * The hardware will never snoop for certain types of accesses: 2706 * - CPU GTT (GMADR->GGTT->no snoop->memory) 2707 * - PPGTT page tables 2708 * - some other special cycles 2709 * 2710 * As with BDW, we also need to consider the following for GT accesses: 2711 * "For GGTT, there is NO pat_sel[2:0] from the entry, 2712 * so RTL will always use the value corresponding to 2713 * pat_sel = 000". 2714 * Which means we must set the snoop bit in PAT entry 0 2715 * in order to keep the global status page working. 2716 */ 2717 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 2718 GEN8_PPAT(1, 0) | 2719 GEN8_PPAT(2, 0) | 2720 GEN8_PPAT(3, 0) | 2721 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 2722 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 2723 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 2724 GEN8_PPAT(7, CHV_PPAT_SNOOP); 2725 2726 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 2727 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 2728 } 2729 2730 static void gen6_gmch_remove(struct i915_address_space *vm) 2731 { 2732 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2733 2734 iounmap(ggtt->gsm); 2735 cleanup_scratch_page(vm); 2736 } 2737 2738 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 2739 { 2740 struct drm_i915_private *dev_priv = ggtt->base.i915; 2741 struct pci_dev *pdev = dev_priv->drm.pdev; 2742 unsigned int size; 2743 u16 snb_gmch_ctl; 2744 2745 /* TODO: We're not aware of mappable constraints on gen8 yet */ 2746 ggtt->mappable_base = pci_resource_start(pdev, 2); 2747 ggtt->mappable_end = pci_resource_len(pdev, 2); 2748 2749 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39))) 2750 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); 2751 2752 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 2753 2754 if (INTEL_GEN(dev_priv) >= 9) { 2755 ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl); 2756 size = gen8_get_total_gtt_size(snb_gmch_ctl); 2757 } else if (IS_CHERRYVIEW(dev_priv)) { 2758 ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl); 2759 size = chv_get_total_gtt_size(snb_gmch_ctl); 2760 } else { 2761 ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl); 2762 size = gen8_get_total_gtt_size(snb_gmch_ctl); 2763 } 2764 2765 ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT; 2766 2767 if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv)) 2768 chv_setup_private_ppat(dev_priv); 2769 else 2770 bdw_setup_private_ppat(dev_priv); 2771 2772 ggtt->base.cleanup = gen6_gmch_remove; 2773 ggtt->base.bind_vma = ggtt_bind_vma; 2774 ggtt->base.unbind_vma = ggtt_unbind_vma; 2775 ggtt->base.insert_page = gen8_ggtt_insert_page; 2776 ggtt->base.clear_range = nop_clear_range; 2777 if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) 2778 ggtt->base.clear_range = gen8_ggtt_clear_range; 2779 2780 ggtt->base.insert_entries = gen8_ggtt_insert_entries; 2781 2782 ggtt->invalidate = gen6_ggtt_invalidate; 2783 2784 return ggtt_probe_common(ggtt, size); 2785 } 2786 2787 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 2788 { 2789 struct drm_i915_private *dev_priv = ggtt->base.i915; 2790 struct pci_dev *pdev = dev_priv->drm.pdev; 2791 unsigned int size; 2792 u16 snb_gmch_ctl; 2793 2794 ggtt->mappable_base = pci_resource_start(pdev, 2); 2795 ggtt->mappable_end = pci_resource_len(pdev, 2); 2796 2797 /* 64/512MB is the current min/max we actually know of, but this is just 2798 * a coarse sanity check. 2799 */ 2800 if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { 2801 DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end); 2802 return -ENXIO; 2803 } 2804 2805 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40))) 2806 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); 2807 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 2808 2809 ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); 2810 2811 size = gen6_get_total_gtt_size(snb_gmch_ctl); 2812 ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT; 2813 2814 ggtt->base.clear_range = gen6_ggtt_clear_range; 2815 ggtt->base.insert_page = gen6_ggtt_insert_page; 2816 ggtt->base.insert_entries = gen6_ggtt_insert_entries; 2817 ggtt->base.bind_vma = ggtt_bind_vma; 2818 ggtt->base.unbind_vma = ggtt_unbind_vma; 2819 ggtt->base.cleanup = gen6_gmch_remove; 2820 2821 ggtt->invalidate = gen6_ggtt_invalidate; 2822 2823 if (HAS_EDRAM(dev_priv)) 2824 ggtt->base.pte_encode = iris_pte_encode; 2825 else if (IS_HASWELL(dev_priv)) 2826 ggtt->base.pte_encode = hsw_pte_encode; 2827 else if (IS_VALLEYVIEW(dev_priv)) 2828 ggtt->base.pte_encode = byt_pte_encode; 2829 else if (INTEL_GEN(dev_priv) >= 7) 2830 ggtt->base.pte_encode = ivb_pte_encode; 2831 else 2832 ggtt->base.pte_encode = snb_pte_encode; 2833 2834 return ggtt_probe_common(ggtt, size); 2835 } 2836 2837 static void i915_gmch_remove(struct i915_address_space *vm) 2838 { 2839 intel_gmch_remove(); 2840 } 2841 2842 static int i915_gmch_probe(struct i915_ggtt *ggtt) 2843 { 2844 struct drm_i915_private *dev_priv = ggtt->base.i915; 2845 int ret; 2846 2847 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL); 2848 if (!ret) { 2849 DRM_ERROR("failed to set up gmch\n"); 2850 return -EIO; 2851 } 2852 2853 intel_gtt_get(&ggtt->base.total, 2854 &ggtt->stolen_size, 2855 &ggtt->mappable_base, 2856 &ggtt->mappable_end); 2857 2858 ggtt->do_idle_maps = needs_idle_maps(dev_priv); 2859 ggtt->base.insert_page = i915_ggtt_insert_page; 2860 ggtt->base.insert_entries = i915_ggtt_insert_entries; 2861 ggtt->base.clear_range = i915_ggtt_clear_range; 2862 ggtt->base.bind_vma = ggtt_bind_vma; 2863 ggtt->base.unbind_vma = ggtt_unbind_vma; 2864 ggtt->base.cleanup = i915_gmch_remove; 2865 2866 ggtt->invalidate = gmch_ggtt_invalidate; 2867 2868 if (unlikely(ggtt->do_idle_maps)) 2869 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 2870 2871 return 0; 2872 } 2873 2874 /** 2875 * i915_ggtt_probe_hw - Probe GGTT hardware location 2876 * @dev_priv: i915 device 2877 */ 2878 int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) 2879 { 2880 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2881 int ret; 2882 2883 ggtt->base.i915 = dev_priv; 2884 ggtt->base.dma = &dev_priv->drm.pdev->dev; 2885 2886 if (INTEL_GEN(dev_priv) <= 5) 2887 ret = i915_gmch_probe(ggtt); 2888 else if (INTEL_GEN(dev_priv) < 8) 2889 ret = gen6_gmch_probe(ggtt); 2890 else 2891 ret = gen8_gmch_probe(ggtt); 2892 if (ret) 2893 return ret; 2894 2895 /* Trim the GGTT to fit the GuC mappable upper range (when enabled). 2896 * This is easier than doing range restriction on the fly, as we 2897 * currently don't have any bits spare to pass in this upper 2898 * restriction! 2899 */ 2900 if (HAS_GUC(dev_priv) && i915.enable_guc_loading) { 2901 ggtt->base.total = min_t(u64, ggtt->base.total, GUC_GGTT_TOP); 2902 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); 2903 } 2904 2905 if ((ggtt->base.total - 1) >> 32) { 2906 DRM_ERROR("We never expected a Global GTT with more than 32bits" 2907 " of address space! Found %lldM!\n", 2908 ggtt->base.total >> 20); 2909 ggtt->base.total = 1ULL << 32; 2910 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); 2911 } 2912 2913 if (ggtt->mappable_end > ggtt->base.total) { 2914 DRM_ERROR("mappable aperture extends past end of GGTT," 2915 " aperture=%llx, total=%llx\n", 2916 ggtt->mappable_end, ggtt->base.total); 2917 ggtt->mappable_end = ggtt->base.total; 2918 } 2919 2920 /* GMADR is the PCI mmio aperture into the global GTT. */ 2921 DRM_INFO("Memory usable by graphics device = %lluM\n", 2922 ggtt->base.total >> 20); 2923 DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20); 2924 DRM_DEBUG_DRIVER("GTT stolen size = %uM\n", ggtt->stolen_size >> 20); 2925 #ifdef CONFIG_INTEL_IOMMU 2926 if (intel_iommu_gfx_mapped) 2927 DRM_INFO("VT-d active for gfx access\n"); 2928 #endif 2929 2930 return 0; 2931 } 2932 2933 /** 2934 * i915_ggtt_init_hw - Initialize GGTT hardware 2935 * @dev_priv: i915 device 2936 */ 2937 int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) 2938 { 2939 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2940 int ret; 2941 2942 INIT_LIST_HEAD(&dev_priv->vm_list); 2943 2944 /* Note that we use page colouring to enforce a guard page at the 2945 * end of the address space. This is required as the CS may prefetch 2946 * beyond the end of the batch buffer, across the page boundary, 2947 * and beyond the end of the GTT if we do not provide a guard. 2948 */ 2949 mutex_lock(&dev_priv->drm.struct_mutex); 2950 i915_address_space_init(&ggtt->base, dev_priv, "[global]"); 2951 if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv)) 2952 ggtt->base.mm.color_adjust = i915_gtt_color_adjust; 2953 mutex_unlock(&dev_priv->drm.struct_mutex); 2954 2955 if (!io_mapping_init_wc(&dev_priv->ggtt.mappable, 2956 dev_priv->ggtt.mappable_base, 2957 dev_priv->ggtt.mappable_end)) { 2958 ret = -EIO; 2959 goto out_gtt_cleanup; 2960 } 2961 2962 ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end); 2963 2964 /* 2965 * Initialise stolen early so that we may reserve preallocated 2966 * objects for the BIOS to KMS transition. 2967 */ 2968 ret = i915_gem_init_stolen(dev_priv); 2969 if (ret) 2970 goto out_gtt_cleanup; 2971 2972 return 0; 2973 2974 out_gtt_cleanup: 2975 ggtt->base.cleanup(&ggtt->base); 2976 return ret; 2977 } 2978 2979 int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) 2980 { 2981 if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt()) 2982 return -EIO; 2983 2984 return 0; 2985 } 2986 2987 void i915_ggtt_enable_guc(struct drm_i915_private *i915) 2988 { 2989 i915->ggtt.invalidate = guc_ggtt_invalidate; 2990 } 2991 2992 void i915_ggtt_disable_guc(struct drm_i915_private *i915) 2993 { 2994 i915->ggtt.invalidate = gen6_ggtt_invalidate; 2995 } 2996 2997 void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) 2998 { 2999 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3000 struct drm_i915_gem_object *obj, *on; 3001 3002 i915_check_and_clear_faults(dev_priv); 3003 3004 /* First fill our portion of the GTT with scratch pages */ 3005 ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total); 3006 3007 ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */ 3008 3009 /* clflush objects bound into the GGTT and rebind them. */ 3010 list_for_each_entry_safe(obj, on, 3011 &dev_priv->mm.bound_list, global_link) { 3012 bool ggtt_bound = false; 3013 struct i915_vma *vma; 3014 3015 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3016 if (vma->vm != &ggtt->base) 3017 continue; 3018 3019 if (!i915_vma_unbind(vma)) 3020 continue; 3021 3022 WARN_ON(i915_vma_bind(vma, obj->cache_level, 3023 PIN_UPDATE)); 3024 ggtt_bound = true; 3025 } 3026 3027 if (ggtt_bound) 3028 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); 3029 } 3030 3031 ggtt->base.closed = false; 3032 3033 if (INTEL_GEN(dev_priv) >= 8) { 3034 if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv)) 3035 chv_setup_private_ppat(dev_priv); 3036 else 3037 bdw_setup_private_ppat(dev_priv); 3038 3039 return; 3040 } 3041 3042 if (USES_PPGTT(dev_priv)) { 3043 struct i915_address_space *vm; 3044 3045 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 3046 struct i915_hw_ppgtt *ppgtt; 3047 3048 if (i915_is_ggtt(vm)) 3049 ppgtt = dev_priv->mm.aliasing_ppgtt; 3050 else 3051 ppgtt = i915_vm_to_ppgtt(vm); 3052 3053 gen6_write_page_range(ppgtt, 0, ppgtt->base.total); 3054 } 3055 } 3056 3057 i915_ggtt_invalidate(dev_priv); 3058 } 3059 3060 static struct scatterlist * 3061 rotate_pages(const dma_addr_t *in, unsigned int offset, 3062 unsigned int width, unsigned int height, 3063 unsigned int stride, 3064 struct sg_table *st, struct scatterlist *sg) 3065 { 3066 unsigned int column, row; 3067 unsigned int src_idx; 3068 3069 for (column = 0; column < width; column++) { 3070 src_idx = stride * (height - 1) + column; 3071 for (row = 0; row < height; row++) { 3072 st->nents++; 3073 /* We don't need the pages, but need to initialize 3074 * the entries so the sg list can be happily traversed. 3075 * The only thing we need are DMA addresses. 3076 */ 3077 sg_set_page(sg, NULL, PAGE_SIZE, 0); 3078 sg_dma_address(sg) = in[offset + src_idx]; 3079 sg_dma_len(sg) = PAGE_SIZE; 3080 sg = sg_next(sg); 3081 src_idx -= stride; 3082 } 3083 } 3084 3085 return sg; 3086 } 3087 3088 static noinline struct sg_table * 3089 intel_rotate_pages(struct intel_rotation_info *rot_info, 3090 struct drm_i915_gem_object *obj) 3091 { 3092 const unsigned long n_pages = obj->base.size / PAGE_SIZE; 3093 unsigned int size = intel_rotation_info_size(rot_info); 3094 struct sgt_iter sgt_iter; 3095 dma_addr_t dma_addr; 3096 unsigned long i; 3097 dma_addr_t *page_addr_list; 3098 struct sg_table *st; 3099 struct scatterlist *sg; 3100 int ret = -ENOMEM; 3101 3102 /* Allocate a temporary list of source pages for random access. */ 3103 page_addr_list = drm_malloc_gfp(n_pages, 3104 sizeof(dma_addr_t), 3105 GFP_TEMPORARY); 3106 if (!page_addr_list) 3107 return ERR_PTR(ret); 3108 3109 /* Allocate target SG list. */ 3110 st = kmalloc(sizeof(*st), GFP_KERNEL); 3111 if (!st) 3112 goto err_st_alloc; 3113 3114 ret = sg_alloc_table(st, size, GFP_KERNEL); 3115 if (ret) 3116 goto err_sg_alloc; 3117 3118 /* Populate source page list from the object. */ 3119 i = 0; 3120 for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages) 3121 page_addr_list[i++] = dma_addr; 3122 3123 GEM_BUG_ON(i != n_pages); 3124 st->nents = 0; 3125 sg = st->sgl; 3126 3127 for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { 3128 sg = rotate_pages(page_addr_list, rot_info->plane[i].offset, 3129 rot_info->plane[i].width, rot_info->plane[i].height, 3130 rot_info->plane[i].stride, st, sg); 3131 } 3132 3133 DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n", 3134 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); 3135 3136 drm_free_large(page_addr_list); 3137 3138 return st; 3139 3140 err_sg_alloc: 3141 kfree(st); 3142 err_st_alloc: 3143 drm_free_large(page_addr_list); 3144 3145 DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", 3146 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); 3147 3148 return ERR_PTR(ret); 3149 } 3150 3151 static noinline struct sg_table * 3152 intel_partial_pages(const struct i915_ggtt_view *view, 3153 struct drm_i915_gem_object *obj) 3154 { 3155 struct sg_table *st; 3156 struct scatterlist *sg, *iter; 3157 unsigned int count = view->partial.size; 3158 unsigned int offset; 3159 int ret = -ENOMEM; 3160 3161 st = kmalloc(sizeof(*st), GFP_KERNEL); 3162 if (!st) 3163 goto err_st_alloc; 3164 3165 ret = sg_alloc_table(st, count, GFP_KERNEL); 3166 if (ret) 3167 goto err_sg_alloc; 3168 3169 iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset); 3170 GEM_BUG_ON(!iter); 3171 3172 sg = st->sgl; 3173 st->nents = 0; 3174 do { 3175 unsigned int len; 3176 3177 len = min(iter->length - (offset << PAGE_SHIFT), 3178 count << PAGE_SHIFT); 3179 sg_set_page(sg, NULL, len, 0); 3180 sg_dma_address(sg) = 3181 sg_dma_address(iter) + (offset << PAGE_SHIFT); 3182 sg_dma_len(sg) = len; 3183 3184 st->nents++; 3185 count -= len >> PAGE_SHIFT; 3186 if (count == 0) { 3187 sg_mark_end(sg); 3188 return st; 3189 } 3190 3191 sg = __sg_next(sg); 3192 iter = __sg_next(iter); 3193 offset = 0; 3194 } while (1); 3195 3196 err_sg_alloc: 3197 kfree(st); 3198 err_st_alloc: 3199 return ERR_PTR(ret); 3200 } 3201 3202 static int 3203 i915_get_ggtt_vma_pages(struct i915_vma *vma) 3204 { 3205 int ret; 3206 3207 /* The vma->pages are only valid within the lifespan of the borrowed 3208 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so 3209 * must be the vma->pages. A simple rule is that vma->pages must only 3210 * be accessed when the obj->mm.pages are pinned. 3211 */ 3212 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); 3213 3214 switch (vma->ggtt_view.type) { 3215 case I915_GGTT_VIEW_NORMAL: 3216 vma->pages = vma->obj->mm.pages; 3217 return 0; 3218 3219 case I915_GGTT_VIEW_ROTATED: 3220 vma->pages = 3221 intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); 3222 break; 3223 3224 case I915_GGTT_VIEW_PARTIAL: 3225 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); 3226 break; 3227 3228 default: 3229 WARN_ONCE(1, "GGTT view %u not implemented!\n", 3230 vma->ggtt_view.type); 3231 return -EINVAL; 3232 } 3233 3234 ret = 0; 3235 if (unlikely(IS_ERR(vma->pages))) { 3236 ret = PTR_ERR(vma->pages); 3237 vma->pages = NULL; 3238 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", 3239 vma->ggtt_view.type, ret); 3240 } 3241 return ret; 3242 } 3243 3244 /** 3245 * i915_gem_gtt_reserve - reserve a node in an address_space (GTT) 3246 * @vm: the &struct i915_address_space 3247 * @node: the &struct drm_mm_node (typically i915_vma.mode) 3248 * @size: how much space to allocate inside the GTT, 3249 * must be #I915_GTT_PAGE_SIZE aligned 3250 * @offset: where to insert inside the GTT, 3251 * must be #I915_GTT_MIN_ALIGNMENT aligned, and the node 3252 * (@offset + @size) must fit within the address space 3253 * @color: color to apply to node, if this node is not from a VMA, 3254 * color must be #I915_COLOR_UNEVICTABLE 3255 * @flags: control search and eviction behaviour 3256 * 3257 * i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside 3258 * the address space (using @size and @color). If the @node does not fit, it 3259 * tries to evict any overlapping nodes from the GTT, including any 3260 * neighbouring nodes if the colors do not match (to ensure guard pages between 3261 * differing domains). See i915_gem_evict_for_node() for the gory details 3262 * on the eviction algorithm. #PIN_NONBLOCK may used to prevent waiting on 3263 * evicting active overlapping objects, and any overlapping node that is pinned 3264 * or marked as unevictable will also result in failure. 3265 * 3266 * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if 3267 * asked to wait for eviction and interrupted. 3268 */ 3269 int i915_gem_gtt_reserve(struct i915_address_space *vm, 3270 struct drm_mm_node *node, 3271 u64 size, u64 offset, unsigned long color, 3272 unsigned int flags) 3273 { 3274 int err; 3275 3276 GEM_BUG_ON(!size); 3277 GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); 3278 GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT)); 3279 GEM_BUG_ON(range_overflows(offset, size, vm->total)); 3280 GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base); 3281 GEM_BUG_ON(drm_mm_node_allocated(node)); 3282 3283 node->size = size; 3284 node->start = offset; 3285 node->color = color; 3286 3287 err = drm_mm_reserve_node(&vm->mm, node); 3288 if (err != -ENOSPC) 3289 return err; 3290 3291 err = i915_gem_evict_for_node(vm, node, flags); 3292 if (err == 0) 3293 err = drm_mm_reserve_node(&vm->mm, node); 3294 3295 return err; 3296 } 3297 3298 static u64 random_offset(u64 start, u64 end, u64 len, u64 align) 3299 { 3300 u64 range, addr; 3301 3302 GEM_BUG_ON(range_overflows(start, len, end)); 3303 GEM_BUG_ON(round_up(start, align) > round_down(end - len, align)); 3304 3305 range = round_down(end - len, align) - round_up(start, align); 3306 if (range) { 3307 if (sizeof(unsigned long) == sizeof(u64)) { 3308 addr = get_random_long(); 3309 } else { 3310 addr = get_random_int(); 3311 if (range > U32_MAX) { 3312 addr <<= 32; 3313 addr |= get_random_int(); 3314 } 3315 } 3316 div64_u64_rem(addr, range, &addr); 3317 start += addr; 3318 } 3319 3320 return round_up(start, align); 3321 } 3322 3323 /** 3324 * i915_gem_gtt_insert - insert a node into an address_space (GTT) 3325 * @vm: the &struct i915_address_space 3326 * @node: the &struct drm_mm_node (typically i915_vma.node) 3327 * @size: how much space to allocate inside the GTT, 3328 * must be #I915_GTT_PAGE_SIZE aligned 3329 * @alignment: required alignment of starting offset, may be 0 but 3330 * if specified, this must be a power-of-two and at least 3331 * #I915_GTT_MIN_ALIGNMENT 3332 * @color: color to apply to node 3333 * @start: start of any range restriction inside GTT (0 for all), 3334 * must be #I915_GTT_PAGE_SIZE aligned 3335 * @end: end of any range restriction inside GTT (U64_MAX for all), 3336 * must be #I915_GTT_PAGE_SIZE aligned if not U64_MAX 3337 * @flags: control search and eviction behaviour 3338 * 3339 * i915_gem_gtt_insert() first searches for an available hole into which 3340 * is can insert the node. The hole address is aligned to @alignment and 3341 * its @size must then fit entirely within the [@start, @end] bounds. The 3342 * nodes on either side of the hole must match @color, or else a guard page 3343 * will be inserted between the two nodes (or the node evicted). If no 3344 * suitable hole is found, first a victim is randomly selected and tested 3345 * for eviction, otherwise then the LRU list of objects within the GTT 3346 * is scanned to find the first set of replacement nodes to create the hole. 3347 * Those old overlapping nodes are evicted from the GTT (and so must be 3348 * rebound before any future use). Any node that is currently pinned cannot 3349 * be evicted (see i915_vma_pin()). Similar if the node's VMA is currently 3350 * active and #PIN_NONBLOCK is specified, that node is also skipped when 3351 * searching for an eviction candidate. See i915_gem_evict_something() for 3352 * the gory details on the eviction algorithm. 3353 * 3354 * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if 3355 * asked to wait for eviction and interrupted. 3356 */ 3357 int i915_gem_gtt_insert(struct i915_address_space *vm, 3358 struct drm_mm_node *node, 3359 u64 size, u64 alignment, unsigned long color, 3360 u64 start, u64 end, unsigned int flags) 3361 { 3362 enum drm_mm_insert_mode mode; 3363 u64 offset; 3364 int err; 3365 3366 lockdep_assert_held(&vm->i915->drm.struct_mutex); 3367 GEM_BUG_ON(!size); 3368 GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); 3369 GEM_BUG_ON(alignment && !is_power_of_2(alignment)); 3370 GEM_BUG_ON(alignment && !IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT)); 3371 GEM_BUG_ON(start >= end); 3372 GEM_BUG_ON(start > 0 && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); 3373 GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); 3374 GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base); 3375 GEM_BUG_ON(drm_mm_node_allocated(node)); 3376 3377 if (unlikely(range_overflows(start, size, end))) 3378 return -ENOSPC; 3379 3380 if (unlikely(round_up(start, alignment) > round_down(end - size, alignment))) 3381 return -ENOSPC; 3382 3383 mode = DRM_MM_INSERT_BEST; 3384 if (flags & PIN_HIGH) 3385 mode = DRM_MM_INSERT_HIGH; 3386 if (flags & PIN_MAPPABLE) 3387 mode = DRM_MM_INSERT_LOW; 3388 3389 /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks, 3390 * so we know that we always have a minimum alignment of 4096. 3391 * The drm_mm range manager is optimised to return results 3392 * with zero alignment, so where possible use the optimal 3393 * path. 3394 */ 3395 BUILD_BUG_ON(I915_GTT_MIN_ALIGNMENT > I915_GTT_PAGE_SIZE); 3396 if (alignment <= I915_GTT_MIN_ALIGNMENT) 3397 alignment = 0; 3398 3399 err = drm_mm_insert_node_in_range(&vm->mm, node, 3400 size, alignment, color, 3401 start, end, mode); 3402 if (err != -ENOSPC) 3403 return err; 3404 3405 /* No free space, pick a slot at random. 3406 * 3407 * There is a pathological case here using a GTT shared between 3408 * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt): 3409 * 3410 * |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->| 3411 * (64k objects) (448k objects) 3412 * 3413 * Now imagine that the eviction LRU is ordered top-down (just because 3414 * pathology meets real life), and that we need to evict an object to 3415 * make room inside the aperture. The eviction scan then has to walk 3416 * the 448k list before it finds one within range. And now imagine that 3417 * it has to search for a new hole between every byte inside the memcpy, 3418 * for several simultaneous clients. 3419 * 3420 * On a full-ppgtt system, if we have run out of available space, there 3421 * will be lots and lots of objects in the eviction list! Again, 3422 * searching that LRU list may be slow if we are also applying any 3423 * range restrictions (e.g. restriction to low 4GiB) and so, for 3424 * simplicity and similarilty between different GTT, try the single 3425 * random replacement first. 3426 */ 3427 offset = random_offset(start, end, 3428 size, alignment ?: I915_GTT_MIN_ALIGNMENT); 3429 err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags); 3430 if (err != -ENOSPC) 3431 return err; 3432 3433 /* Randomly selected placement is pinned, do a search */ 3434 err = i915_gem_evict_something(vm, size, alignment, color, 3435 start, end, flags); 3436 if (err) 3437 return err; 3438 3439 return drm_mm_insert_node_in_range(&vm->mm, node, 3440 size, alignment, color, 3441 start, end, DRM_MM_INSERT_EVICT); 3442 } 3443 3444 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 3445 #include "selftests/mock_gtt.c" 3446 #include "selftests/i915_gem_gtt.c" 3447 #endif 3448