1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/slab.h> /* fault-inject.h is not standalone! */ 27 28 #include <linux/fault-inject.h> 29 #include <linux/log2.h> 30 #include <linux/random.h> 31 #include <linux/seq_file.h> 32 #include <linux/stop_machine.h> 33 34 #include <asm/set_memory.h> 35 36 #include <drm/drmP.h> 37 #include <drm/i915_drm.h> 38 39 #include "i915_drv.h" 40 #include "i915_vgpu.h" 41 #include "i915_trace.h" 42 #include "intel_drv.h" 43 #include "intel_frontbuffer.h" 44 45 #define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM) 46 47 /** 48 * DOC: Global GTT views 49 * 50 * Background and previous state 51 * 52 * Historically objects could exists (be bound) in global GTT space only as 53 * singular instances with a view representing all of the object's backing pages 54 * in a linear fashion. This view will be called a normal view. 55 * 56 * To support multiple views of the same object, where the number of mapped 57 * pages is not equal to the backing store, or where the layout of the pages 58 * is not linear, concept of a GGTT view was added. 59 * 60 * One example of an alternative view is a stereo display driven by a single 61 * image. In this case we would have a framebuffer looking like this 62 * (2x2 pages): 63 * 64 * 12 65 * 34 66 * 67 * Above would represent a normal GGTT view as normally mapped for GPU or CPU 68 * rendering. In contrast, fed to the display engine would be an alternative 69 * view which could look something like this: 70 * 71 * 1212 72 * 3434 73 * 74 * In this example both the size and layout of pages in the alternative view is 75 * different from the normal view. 76 * 77 * Implementation and usage 78 * 79 * GGTT views are implemented using VMAs and are distinguished via enum 80 * i915_ggtt_view_type and struct i915_ggtt_view. 81 * 82 * A new flavour of core GEM functions which work with GGTT bound objects were 83 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid 84 * renaming in large amounts of code. They take the struct i915_ggtt_view 85 * parameter encapsulating all metadata required to implement a view. 86 * 87 * As a helper for callers which are only interested in the normal view, 88 * globally const i915_ggtt_view_normal singleton instance exists. All old core 89 * GEM API functions, the ones not taking the view parameter, are operating on, 90 * or with the normal GGTT view. 91 * 92 * Code wanting to add or use a new GGTT view needs to: 93 * 94 * 1. Add a new enum with a suitable name. 95 * 2. Extend the metadata in the i915_ggtt_view structure if required. 96 * 3. Add support to i915_get_vma_pages(). 97 * 98 * New views are required to build a scatter-gather table from within the 99 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and 100 * exists for the lifetime of an VMA. 101 * 102 * Core API is designed to have copy semantics which means that passed in 103 * struct i915_ggtt_view does not need to be persistent (left around after 104 * calling the core API functions). 105 * 106 */ 107 108 static int 109 i915_get_ggtt_vma_pages(struct i915_vma *vma); 110 111 static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv) 112 { 113 /* Note that as an uncached mmio write, this should flush the 114 * WCB of the writes into the GGTT before it triggers the invalidate. 115 */ 116 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 117 } 118 119 static void guc_ggtt_invalidate(struct drm_i915_private *dev_priv) 120 { 121 gen6_ggtt_invalidate(dev_priv); 122 I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); 123 } 124 125 static void gmch_ggtt_invalidate(struct drm_i915_private *dev_priv) 126 { 127 intel_gtt_chipset_flush(); 128 } 129 130 static inline void i915_ggtt_invalidate(struct drm_i915_private *i915) 131 { 132 i915->ggtt.invalidate(i915); 133 } 134 135 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, 136 int enable_ppgtt) 137 { 138 bool has_full_ppgtt; 139 bool has_full_48bit_ppgtt; 140 141 if (!dev_priv->info.has_aliasing_ppgtt) 142 return 0; 143 144 has_full_ppgtt = dev_priv->info.has_full_ppgtt; 145 has_full_48bit_ppgtt = dev_priv->info.has_full_48bit_ppgtt; 146 147 if (intel_vgpu_active(dev_priv)) { 148 /* GVT-g has no support for 32bit ppgtt */ 149 has_full_ppgtt = false; 150 has_full_48bit_ppgtt = intel_vgpu_has_full_48bit_ppgtt(dev_priv); 151 } 152 153 /* 154 * We don't allow disabling PPGTT for gen9+ as it's a requirement for 155 * execlists, the sole mechanism available to submit work. 156 */ 157 if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9) 158 return 0; 159 160 if (enable_ppgtt == 1) 161 return 1; 162 163 if (enable_ppgtt == 2 && has_full_ppgtt) 164 return 2; 165 166 if (enable_ppgtt == 3 && has_full_48bit_ppgtt) 167 return 3; 168 169 /* Disable ppgtt on SNB if VT-d is on. */ 170 if (IS_GEN6(dev_priv) && intel_vtd_active()) { 171 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 172 return 0; 173 } 174 175 /* Early VLV doesn't have this */ 176 if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) { 177 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n"); 178 return 0; 179 } 180 181 if (INTEL_GEN(dev_priv) >= 8 && i915_modparams.enable_execlists) { 182 if (has_full_48bit_ppgtt) 183 return 3; 184 185 if (has_full_ppgtt) 186 return 2; 187 } 188 189 return 1; 190 } 191 192 static int ppgtt_bind_vma(struct i915_vma *vma, 193 enum i915_cache_level cache_level, 194 u32 unused) 195 { 196 u32 pte_flags; 197 int ret; 198 199 if (!(vma->flags & I915_VMA_LOCAL_BIND)) { 200 ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, 201 vma->size); 202 if (ret) 203 return ret; 204 } 205 206 /* Currently applicable only to VLV */ 207 pte_flags = 0; 208 if (vma->obj->gt_ro) 209 pte_flags |= PTE_READ_ONLY; 210 211 vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); 212 213 return 0; 214 } 215 216 static void ppgtt_unbind_vma(struct i915_vma *vma) 217 { 218 vma->vm->clear_range(vma->vm, vma->node.start, vma->size); 219 } 220 221 static int ppgtt_set_pages(struct i915_vma *vma) 222 { 223 GEM_BUG_ON(vma->pages); 224 225 vma->pages = vma->obj->mm.pages; 226 227 vma->page_sizes = vma->obj->mm.page_sizes; 228 229 return 0; 230 } 231 232 static void clear_pages(struct i915_vma *vma) 233 { 234 GEM_BUG_ON(!vma->pages); 235 236 if (vma->pages != vma->obj->mm.pages) { 237 sg_free_table(vma->pages); 238 kfree(vma->pages); 239 } 240 vma->pages = NULL; 241 242 memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); 243 } 244 245 static gen8_pte_t gen8_pte_encode(dma_addr_t addr, 246 enum i915_cache_level level) 247 { 248 gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW; 249 pte |= addr; 250 251 switch (level) { 252 case I915_CACHE_NONE: 253 pte |= PPAT_UNCACHED; 254 break; 255 case I915_CACHE_WT: 256 pte |= PPAT_DISPLAY_ELLC; 257 break; 258 default: 259 pte |= PPAT_CACHED; 260 break; 261 } 262 263 return pte; 264 } 265 266 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, 267 const enum i915_cache_level level) 268 { 269 gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 270 pde |= addr; 271 if (level != I915_CACHE_NONE) 272 pde |= PPAT_CACHED_PDE; 273 else 274 pde |= PPAT_UNCACHED; 275 return pde; 276 } 277 278 #define gen8_pdpe_encode gen8_pde_encode 279 #define gen8_pml4e_encode gen8_pde_encode 280 281 static gen6_pte_t snb_pte_encode(dma_addr_t addr, 282 enum i915_cache_level level, 283 u32 unused) 284 { 285 gen6_pte_t pte = GEN6_PTE_VALID; 286 pte |= GEN6_PTE_ADDR_ENCODE(addr); 287 288 switch (level) { 289 case I915_CACHE_L3_LLC: 290 case I915_CACHE_LLC: 291 pte |= GEN6_PTE_CACHE_LLC; 292 break; 293 case I915_CACHE_NONE: 294 pte |= GEN6_PTE_UNCACHED; 295 break; 296 default: 297 MISSING_CASE(level); 298 } 299 300 return pte; 301 } 302 303 static gen6_pte_t ivb_pte_encode(dma_addr_t addr, 304 enum i915_cache_level level, 305 u32 unused) 306 { 307 gen6_pte_t pte = GEN6_PTE_VALID; 308 pte |= GEN6_PTE_ADDR_ENCODE(addr); 309 310 switch (level) { 311 case I915_CACHE_L3_LLC: 312 pte |= GEN7_PTE_CACHE_L3_LLC; 313 break; 314 case I915_CACHE_LLC: 315 pte |= GEN6_PTE_CACHE_LLC; 316 break; 317 case I915_CACHE_NONE: 318 pte |= GEN6_PTE_UNCACHED; 319 break; 320 default: 321 MISSING_CASE(level); 322 } 323 324 return pte; 325 } 326 327 static gen6_pte_t byt_pte_encode(dma_addr_t addr, 328 enum i915_cache_level level, 329 u32 flags) 330 { 331 gen6_pte_t pte = GEN6_PTE_VALID; 332 pte |= GEN6_PTE_ADDR_ENCODE(addr); 333 334 if (!(flags & PTE_READ_ONLY)) 335 pte |= BYT_PTE_WRITEABLE; 336 337 if (level != I915_CACHE_NONE) 338 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 339 340 return pte; 341 } 342 343 static gen6_pte_t hsw_pte_encode(dma_addr_t addr, 344 enum i915_cache_level level, 345 u32 unused) 346 { 347 gen6_pte_t pte = GEN6_PTE_VALID; 348 pte |= HSW_PTE_ADDR_ENCODE(addr); 349 350 if (level != I915_CACHE_NONE) 351 pte |= HSW_WB_LLC_AGE3; 352 353 return pte; 354 } 355 356 static gen6_pte_t iris_pte_encode(dma_addr_t addr, 357 enum i915_cache_level level, 358 u32 unused) 359 { 360 gen6_pte_t pte = GEN6_PTE_VALID; 361 pte |= HSW_PTE_ADDR_ENCODE(addr); 362 363 switch (level) { 364 case I915_CACHE_NONE: 365 break; 366 case I915_CACHE_WT: 367 pte |= HSW_WT_ELLC_LLC_AGE3; 368 break; 369 default: 370 pte |= HSW_WB_ELLC_LLC_AGE3; 371 break; 372 } 373 374 return pte; 375 } 376 377 static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) 378 { 379 struct pagevec *pvec = &vm->free_pages; 380 381 if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) 382 i915_gem_shrink_all(vm->i915); 383 384 if (likely(pvec->nr)) 385 return pvec->pages[--pvec->nr]; 386 387 if (!vm->pt_kmap_wc) 388 return alloc_page(gfp); 389 390 /* A placeholder for a specific mutex to guard the WC stash */ 391 lockdep_assert_held(&vm->i915->drm.struct_mutex); 392 393 /* Look in our global stash of WC pages... */ 394 pvec = &vm->i915->mm.wc_stash; 395 if (likely(pvec->nr)) 396 return pvec->pages[--pvec->nr]; 397 398 /* Otherwise batch allocate pages to amoritize cost of set_pages_wc. */ 399 do { 400 struct page *page; 401 402 page = alloc_page(gfp); 403 if (unlikely(!page)) 404 break; 405 406 pvec->pages[pvec->nr++] = page; 407 } while (pagevec_space(pvec)); 408 409 if (unlikely(!pvec->nr)) 410 return NULL; 411 412 set_pages_array_wc(pvec->pages, pvec->nr); 413 414 return pvec->pages[--pvec->nr]; 415 } 416 417 static void vm_free_pages_release(struct i915_address_space *vm, 418 bool immediate) 419 { 420 struct pagevec *pvec = &vm->free_pages; 421 422 GEM_BUG_ON(!pagevec_count(pvec)); 423 424 if (vm->pt_kmap_wc) { 425 struct pagevec *stash = &vm->i915->mm.wc_stash; 426 427 /* When we use WC, first fill up the global stash and then 428 * only if full immediately free the overflow. 429 */ 430 431 lockdep_assert_held(&vm->i915->drm.struct_mutex); 432 if (pagevec_space(stash)) { 433 do { 434 stash->pages[stash->nr++] = 435 pvec->pages[--pvec->nr]; 436 if (!pvec->nr) 437 return; 438 } while (pagevec_space(stash)); 439 440 /* As we have made some room in the VM's free_pages, 441 * we can wait for it to fill again. Unless we are 442 * inside i915_address_space_fini() and must 443 * immediately release the pages! 444 */ 445 if (!immediate) 446 return; 447 } 448 449 set_pages_array_wb(pvec->pages, pvec->nr); 450 } 451 452 __pagevec_release(pvec); 453 } 454 455 static void vm_free_page(struct i915_address_space *vm, struct page *page) 456 { 457 if (!pagevec_add(&vm->free_pages, page)) 458 vm_free_pages_release(vm, false); 459 } 460 461 static int __setup_page_dma(struct i915_address_space *vm, 462 struct i915_page_dma *p, 463 gfp_t gfp) 464 { 465 p->page = vm_alloc_page(vm, gfp | __GFP_NOWARN | __GFP_NORETRY); 466 if (unlikely(!p->page)) 467 return -ENOMEM; 468 469 p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE, 470 PCI_DMA_BIDIRECTIONAL); 471 if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { 472 vm_free_page(vm, p->page); 473 return -ENOMEM; 474 } 475 476 return 0; 477 } 478 479 static int setup_page_dma(struct i915_address_space *vm, 480 struct i915_page_dma *p) 481 { 482 return __setup_page_dma(vm, p, I915_GFP_DMA); 483 } 484 485 static void cleanup_page_dma(struct i915_address_space *vm, 486 struct i915_page_dma *p) 487 { 488 dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 489 vm_free_page(vm, p->page); 490 } 491 492 #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page) 493 494 #define setup_px(vm, px) setup_page_dma((vm), px_base(px)) 495 #define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px)) 496 #define fill_px(ppgtt, px, v) fill_page_dma((vm), px_base(px), (v)) 497 #define fill32_px(ppgtt, px, v) fill_page_dma_32((vm), px_base(px), (v)) 498 499 static void fill_page_dma(struct i915_address_space *vm, 500 struct i915_page_dma *p, 501 const u64 val) 502 { 503 u64 * const vaddr = kmap_atomic(p->page); 504 505 memset64(vaddr, val, PAGE_SIZE / sizeof(val)); 506 507 kunmap_atomic(vaddr); 508 } 509 510 static void fill_page_dma_32(struct i915_address_space *vm, 511 struct i915_page_dma *p, 512 const u32 v) 513 { 514 fill_page_dma(vm, p, (u64)v << 32 | v); 515 } 516 517 static int 518 setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) 519 { 520 struct page *page = NULL; 521 dma_addr_t addr; 522 int order; 523 524 /* 525 * In order to utilize 64K pages for an object with a size < 2M, we will 526 * need to support a 64K scratch page, given that every 16th entry for a 527 * page-table operating in 64K mode must point to a properly aligned 64K 528 * region, including any PTEs which happen to point to scratch. 529 * 530 * This is only relevant for the 48b PPGTT where we support 531 * huge-gtt-pages, see also i915_vma_insert(). 532 * 533 * TODO: we should really consider write-protecting the scratch-page and 534 * sharing between ppgtt 535 */ 536 if (i915_vm_is_48bit(vm) && 537 HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) { 538 order = get_order(I915_GTT_PAGE_SIZE_64K); 539 page = alloc_pages(gfp | __GFP_ZERO | __GFP_NOWARN, order); 540 if (page) { 541 addr = dma_map_page(vm->dma, page, 0, 542 I915_GTT_PAGE_SIZE_64K, 543 PCI_DMA_BIDIRECTIONAL); 544 if (unlikely(dma_mapping_error(vm->dma, addr))) { 545 __free_pages(page, order); 546 page = NULL; 547 } 548 549 if (!IS_ALIGNED(addr, I915_GTT_PAGE_SIZE_64K)) { 550 dma_unmap_page(vm->dma, addr, 551 I915_GTT_PAGE_SIZE_64K, 552 PCI_DMA_BIDIRECTIONAL); 553 __free_pages(page, order); 554 page = NULL; 555 } 556 } 557 } 558 559 if (!page) { 560 order = 0; 561 page = alloc_page(gfp | __GFP_ZERO); 562 if (unlikely(!page)) 563 return -ENOMEM; 564 565 addr = dma_map_page(vm->dma, page, 0, PAGE_SIZE, 566 PCI_DMA_BIDIRECTIONAL); 567 if (unlikely(dma_mapping_error(vm->dma, addr))) { 568 __free_page(page); 569 return -ENOMEM; 570 } 571 } 572 573 vm->scratch_page.page = page; 574 vm->scratch_page.daddr = addr; 575 vm->scratch_page.order = order; 576 577 return 0; 578 } 579 580 static void cleanup_scratch_page(struct i915_address_space *vm) 581 { 582 struct i915_page_dma *p = &vm->scratch_page; 583 584 dma_unmap_page(vm->dma, p->daddr, BIT(p->order) << PAGE_SHIFT, 585 PCI_DMA_BIDIRECTIONAL); 586 __free_pages(p->page, p->order); 587 } 588 589 static struct i915_page_table *alloc_pt(struct i915_address_space *vm) 590 { 591 struct i915_page_table *pt; 592 593 pt = kmalloc(sizeof(*pt), GFP_KERNEL | __GFP_NOWARN); 594 if (unlikely(!pt)) 595 return ERR_PTR(-ENOMEM); 596 597 if (unlikely(setup_px(vm, pt))) { 598 kfree(pt); 599 return ERR_PTR(-ENOMEM); 600 } 601 602 pt->used_ptes = 0; 603 return pt; 604 } 605 606 static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt) 607 { 608 cleanup_px(vm, pt); 609 kfree(pt); 610 } 611 612 static void gen8_initialize_pt(struct i915_address_space *vm, 613 struct i915_page_table *pt) 614 { 615 fill_px(vm, pt, 616 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC)); 617 } 618 619 static void gen6_initialize_pt(struct i915_address_space *vm, 620 struct i915_page_table *pt) 621 { 622 fill32_px(vm, pt, 623 vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0)); 624 } 625 626 static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) 627 { 628 struct i915_page_directory *pd; 629 630 pd = kzalloc(sizeof(*pd), GFP_KERNEL | __GFP_NOWARN); 631 if (unlikely(!pd)) 632 return ERR_PTR(-ENOMEM); 633 634 if (unlikely(setup_px(vm, pd))) { 635 kfree(pd); 636 return ERR_PTR(-ENOMEM); 637 } 638 639 pd->used_pdes = 0; 640 return pd; 641 } 642 643 static void free_pd(struct i915_address_space *vm, 644 struct i915_page_directory *pd) 645 { 646 cleanup_px(vm, pd); 647 kfree(pd); 648 } 649 650 static void gen8_initialize_pd(struct i915_address_space *vm, 651 struct i915_page_directory *pd) 652 { 653 unsigned int i; 654 655 fill_px(vm, pd, 656 gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC)); 657 for (i = 0; i < I915_PDES; i++) 658 pd->page_table[i] = vm->scratch_pt; 659 } 660 661 static int __pdp_init(struct i915_address_space *vm, 662 struct i915_page_directory_pointer *pdp) 663 { 664 const unsigned int pdpes = i915_pdpes_per_pdp(vm); 665 unsigned int i; 666 667 pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory), 668 GFP_KERNEL | __GFP_NOWARN); 669 if (unlikely(!pdp->page_directory)) 670 return -ENOMEM; 671 672 for (i = 0; i < pdpes; i++) 673 pdp->page_directory[i] = vm->scratch_pd; 674 675 return 0; 676 } 677 678 static void __pdp_fini(struct i915_page_directory_pointer *pdp) 679 { 680 kfree(pdp->page_directory); 681 pdp->page_directory = NULL; 682 } 683 684 static inline bool use_4lvl(const struct i915_address_space *vm) 685 { 686 return i915_vm_is_48bit(vm); 687 } 688 689 static struct i915_page_directory_pointer * 690 alloc_pdp(struct i915_address_space *vm) 691 { 692 struct i915_page_directory_pointer *pdp; 693 int ret = -ENOMEM; 694 695 WARN_ON(!use_4lvl(vm)); 696 697 pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); 698 if (!pdp) 699 return ERR_PTR(-ENOMEM); 700 701 ret = __pdp_init(vm, pdp); 702 if (ret) 703 goto fail_bitmap; 704 705 ret = setup_px(vm, pdp); 706 if (ret) 707 goto fail_page_m; 708 709 return pdp; 710 711 fail_page_m: 712 __pdp_fini(pdp); 713 fail_bitmap: 714 kfree(pdp); 715 716 return ERR_PTR(ret); 717 } 718 719 static void free_pdp(struct i915_address_space *vm, 720 struct i915_page_directory_pointer *pdp) 721 { 722 __pdp_fini(pdp); 723 724 if (!use_4lvl(vm)) 725 return; 726 727 cleanup_px(vm, pdp); 728 kfree(pdp); 729 } 730 731 static void gen8_initialize_pdp(struct i915_address_space *vm, 732 struct i915_page_directory_pointer *pdp) 733 { 734 gen8_ppgtt_pdpe_t scratch_pdpe; 735 736 scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); 737 738 fill_px(vm, pdp, scratch_pdpe); 739 } 740 741 static void gen8_initialize_pml4(struct i915_address_space *vm, 742 struct i915_pml4 *pml4) 743 { 744 unsigned int i; 745 746 fill_px(vm, pml4, 747 gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC)); 748 for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) 749 pml4->pdps[i] = vm->scratch_pdp; 750 } 751 752 /* Broadwell Page Directory Pointer Descriptors */ 753 static int gen8_write_pdp(struct drm_i915_gem_request *req, 754 unsigned entry, 755 dma_addr_t addr) 756 { 757 struct intel_engine_cs *engine = req->engine; 758 u32 *cs; 759 760 BUG_ON(entry >= 4); 761 762 cs = intel_ring_begin(req, 6); 763 if (IS_ERR(cs)) 764 return PTR_ERR(cs); 765 766 *cs++ = MI_LOAD_REGISTER_IMM(1); 767 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, entry)); 768 *cs++ = upper_32_bits(addr); 769 *cs++ = MI_LOAD_REGISTER_IMM(1); 770 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, entry)); 771 *cs++ = lower_32_bits(addr); 772 intel_ring_advance(req, cs); 773 774 return 0; 775 } 776 777 static int gen8_mm_switch_3lvl(struct i915_hw_ppgtt *ppgtt, 778 struct drm_i915_gem_request *req) 779 { 780 int i, ret; 781 782 for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) { 783 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); 784 785 ret = gen8_write_pdp(req, i, pd_daddr); 786 if (ret) 787 return ret; 788 } 789 790 return 0; 791 } 792 793 static int gen8_mm_switch_4lvl(struct i915_hw_ppgtt *ppgtt, 794 struct drm_i915_gem_request *req) 795 { 796 return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); 797 } 798 799 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify 800 * the page table structures, we mark them dirty so that 801 * context switching/execlist queuing code takes extra steps 802 * to ensure that tlbs are flushed. 803 */ 804 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) 805 { 806 ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.i915)->ring_mask; 807 } 808 809 /* Removes entries from a single page table, releasing it if it's empty. 810 * Caller can use the return value to update higher-level entries. 811 */ 812 static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, 813 struct i915_page_table *pt, 814 u64 start, u64 length) 815 { 816 unsigned int num_entries = gen8_pte_count(start, length); 817 unsigned int pte = gen8_pte_index(start); 818 unsigned int pte_end = pte + num_entries; 819 const gen8_pte_t scratch_pte = 820 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); 821 gen8_pte_t *vaddr; 822 823 GEM_BUG_ON(num_entries > pt->used_ptes); 824 825 pt->used_ptes -= num_entries; 826 if (!pt->used_ptes) 827 return true; 828 829 vaddr = kmap_atomic_px(pt); 830 while (pte < pte_end) 831 vaddr[pte++] = scratch_pte; 832 kunmap_atomic(vaddr); 833 834 return false; 835 } 836 837 static void gen8_ppgtt_set_pde(struct i915_address_space *vm, 838 struct i915_page_directory *pd, 839 struct i915_page_table *pt, 840 unsigned int pde) 841 { 842 gen8_pde_t *vaddr; 843 844 pd->page_table[pde] = pt; 845 846 vaddr = kmap_atomic_px(pd); 847 vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC); 848 kunmap_atomic(vaddr); 849 } 850 851 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, 852 struct i915_page_directory *pd, 853 u64 start, u64 length) 854 { 855 struct i915_page_table *pt; 856 u32 pde; 857 858 gen8_for_each_pde(pt, pd, start, length, pde) { 859 GEM_BUG_ON(pt == vm->scratch_pt); 860 861 if (!gen8_ppgtt_clear_pt(vm, pt, start, length)) 862 continue; 863 864 gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde); 865 GEM_BUG_ON(!pd->used_pdes); 866 pd->used_pdes--; 867 868 free_pt(vm, pt); 869 } 870 871 return !pd->used_pdes; 872 } 873 874 static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm, 875 struct i915_page_directory_pointer *pdp, 876 struct i915_page_directory *pd, 877 unsigned int pdpe) 878 { 879 gen8_ppgtt_pdpe_t *vaddr; 880 881 pdp->page_directory[pdpe] = pd; 882 if (!use_4lvl(vm)) 883 return; 884 885 vaddr = kmap_atomic_px(pdp); 886 vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); 887 kunmap_atomic(vaddr); 888 } 889 890 /* Removes entries from a single page dir pointer, releasing it if it's empty. 891 * Caller can use the return value to update higher-level entries 892 */ 893 static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, 894 struct i915_page_directory_pointer *pdp, 895 u64 start, u64 length) 896 { 897 struct i915_page_directory *pd; 898 unsigned int pdpe; 899 900 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 901 GEM_BUG_ON(pd == vm->scratch_pd); 902 903 if (!gen8_ppgtt_clear_pd(vm, pd, start, length)) 904 continue; 905 906 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); 907 GEM_BUG_ON(!pdp->used_pdpes); 908 pdp->used_pdpes--; 909 910 free_pd(vm, pd); 911 } 912 913 return !pdp->used_pdpes; 914 } 915 916 static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm, 917 u64 start, u64 length) 918 { 919 gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length); 920 } 921 922 static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4, 923 struct i915_page_directory_pointer *pdp, 924 unsigned int pml4e) 925 { 926 gen8_ppgtt_pml4e_t *vaddr; 927 928 pml4->pdps[pml4e] = pdp; 929 930 vaddr = kmap_atomic_px(pml4); 931 vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); 932 kunmap_atomic(vaddr); 933 } 934 935 /* Removes entries from a single pml4. 936 * This is the top-level structure in 4-level page tables used on gen8+. 937 * Empty entries are always scratch pml4e. 938 */ 939 static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, 940 u64 start, u64 length) 941 { 942 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 943 struct i915_pml4 *pml4 = &ppgtt->pml4; 944 struct i915_page_directory_pointer *pdp; 945 unsigned int pml4e; 946 947 GEM_BUG_ON(!use_4lvl(vm)); 948 949 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 950 GEM_BUG_ON(pdp == vm->scratch_pdp); 951 952 if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length)) 953 continue; 954 955 gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); 956 957 free_pdp(vm, pdp); 958 } 959 } 960 961 static inline struct sgt_dma { 962 struct scatterlist *sg; 963 dma_addr_t dma, max; 964 } sgt_dma(struct i915_vma *vma) { 965 struct scatterlist *sg = vma->pages->sgl; 966 dma_addr_t addr = sg_dma_address(sg); 967 return (struct sgt_dma) { sg, addr, addr + sg->length }; 968 } 969 970 struct gen8_insert_pte { 971 u16 pml4e; 972 u16 pdpe; 973 u16 pde; 974 u16 pte; 975 }; 976 977 static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start) 978 { 979 return (struct gen8_insert_pte) { 980 gen8_pml4e_index(start), 981 gen8_pdpe_index(start), 982 gen8_pde_index(start), 983 gen8_pte_index(start), 984 }; 985 } 986 987 static __always_inline bool 988 gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, 989 struct i915_page_directory_pointer *pdp, 990 struct sgt_dma *iter, 991 struct gen8_insert_pte *idx, 992 enum i915_cache_level cache_level) 993 { 994 struct i915_page_directory *pd; 995 const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); 996 gen8_pte_t *vaddr; 997 bool ret; 998 999 GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base)); 1000 pd = pdp->page_directory[idx->pdpe]; 1001 vaddr = kmap_atomic_px(pd->page_table[idx->pde]); 1002 do { 1003 vaddr[idx->pte] = pte_encode | iter->dma; 1004 1005 iter->dma += PAGE_SIZE; 1006 if (iter->dma >= iter->max) { 1007 iter->sg = __sg_next(iter->sg); 1008 if (!iter->sg) { 1009 ret = false; 1010 break; 1011 } 1012 1013 iter->dma = sg_dma_address(iter->sg); 1014 iter->max = iter->dma + iter->sg->length; 1015 } 1016 1017 if (++idx->pte == GEN8_PTES) { 1018 idx->pte = 0; 1019 1020 if (++idx->pde == I915_PDES) { 1021 idx->pde = 0; 1022 1023 /* Limited by sg length for 3lvl */ 1024 if (++idx->pdpe == GEN8_PML4ES_PER_PML4) { 1025 idx->pdpe = 0; 1026 ret = true; 1027 break; 1028 } 1029 1030 GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base)); 1031 pd = pdp->page_directory[idx->pdpe]; 1032 } 1033 1034 kunmap_atomic(vaddr); 1035 vaddr = kmap_atomic_px(pd->page_table[idx->pde]); 1036 } 1037 } while (1); 1038 kunmap_atomic(vaddr); 1039 1040 return ret; 1041 } 1042 1043 static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, 1044 struct i915_vma *vma, 1045 enum i915_cache_level cache_level, 1046 u32 unused) 1047 { 1048 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1049 struct sgt_dma iter = sgt_dma(vma); 1050 struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); 1051 1052 gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, 1053 cache_level); 1054 1055 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; 1056 } 1057 1058 static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, 1059 struct i915_page_directory_pointer **pdps, 1060 struct sgt_dma *iter, 1061 enum i915_cache_level cache_level) 1062 { 1063 const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); 1064 u64 start = vma->node.start; 1065 dma_addr_t rem = iter->sg->length; 1066 1067 do { 1068 struct gen8_insert_pte idx = gen8_insert_pte(start); 1069 struct i915_page_directory_pointer *pdp = pdps[idx.pml4e]; 1070 struct i915_page_directory *pd = pdp->page_directory[idx.pdpe]; 1071 unsigned int page_size; 1072 bool maybe_64K = false; 1073 gen8_pte_t encode = pte_encode; 1074 gen8_pte_t *vaddr; 1075 u16 index, max; 1076 1077 if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M && 1078 IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && 1079 rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) { 1080 index = idx.pde; 1081 max = I915_PDES; 1082 page_size = I915_GTT_PAGE_SIZE_2M; 1083 1084 encode |= GEN8_PDE_PS_2M; 1085 1086 vaddr = kmap_atomic_px(pd); 1087 } else { 1088 struct i915_page_table *pt = pd->page_table[idx.pde]; 1089 1090 index = idx.pte; 1091 max = GEN8_PTES; 1092 page_size = I915_GTT_PAGE_SIZE; 1093 1094 if (!index && 1095 vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K && 1096 IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && 1097 (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || 1098 rem >= (max - index) << PAGE_SHIFT)) 1099 maybe_64K = true; 1100 1101 vaddr = kmap_atomic_px(pt); 1102 } 1103 1104 do { 1105 GEM_BUG_ON(iter->sg->length < page_size); 1106 vaddr[index++] = encode | iter->dma; 1107 1108 start += page_size; 1109 iter->dma += page_size; 1110 rem -= page_size; 1111 if (iter->dma >= iter->max) { 1112 iter->sg = __sg_next(iter->sg); 1113 if (!iter->sg) 1114 break; 1115 1116 rem = iter->sg->length; 1117 iter->dma = sg_dma_address(iter->sg); 1118 iter->max = iter->dma + rem; 1119 1120 if (maybe_64K && index < max && 1121 !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && 1122 (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || 1123 rem >= (max - index) << PAGE_SHIFT))) 1124 maybe_64K = false; 1125 1126 if (unlikely(!IS_ALIGNED(iter->dma, page_size))) 1127 break; 1128 } 1129 } while (rem >= page_size && index < max); 1130 1131 kunmap_atomic(vaddr); 1132 1133 /* 1134 * Is it safe to mark the 2M block as 64K? -- Either we have 1135 * filled whole page-table with 64K entries, or filled part of 1136 * it and have reached the end of the sg table and we have 1137 * enough padding. 1138 */ 1139 if (maybe_64K && 1140 (index == max || 1141 (i915_vm_has_scratch_64K(vma->vm) && 1142 !iter->sg && IS_ALIGNED(vma->node.start + 1143 vma->node.size, 1144 I915_GTT_PAGE_SIZE_2M)))) { 1145 vaddr = kmap_atomic_px(pd); 1146 vaddr[idx.pde] |= GEN8_PDE_IPS_64K; 1147 kunmap_atomic(vaddr); 1148 page_size = I915_GTT_PAGE_SIZE_64K; 1149 } 1150 1151 vma->page_sizes.gtt |= page_size; 1152 } while (iter->sg); 1153 } 1154 1155 static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, 1156 struct i915_vma *vma, 1157 enum i915_cache_level cache_level, 1158 u32 unused) 1159 { 1160 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1161 struct sgt_dma iter = sgt_dma(vma); 1162 struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; 1163 1164 if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { 1165 gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level); 1166 } else { 1167 struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); 1168 1169 while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], 1170 &iter, &idx, cache_level)) 1171 GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); 1172 1173 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; 1174 } 1175 } 1176 1177 static void gen8_free_page_tables(struct i915_address_space *vm, 1178 struct i915_page_directory *pd) 1179 { 1180 int i; 1181 1182 if (!px_page(pd)) 1183 return; 1184 1185 for (i = 0; i < I915_PDES; i++) { 1186 if (pd->page_table[i] != vm->scratch_pt) 1187 free_pt(vm, pd->page_table[i]); 1188 } 1189 } 1190 1191 static int gen8_init_scratch(struct i915_address_space *vm) 1192 { 1193 int ret; 1194 1195 ret = setup_scratch_page(vm, I915_GFP_DMA); 1196 if (ret) 1197 return ret; 1198 1199 vm->scratch_pt = alloc_pt(vm); 1200 if (IS_ERR(vm->scratch_pt)) { 1201 ret = PTR_ERR(vm->scratch_pt); 1202 goto free_scratch_page; 1203 } 1204 1205 vm->scratch_pd = alloc_pd(vm); 1206 if (IS_ERR(vm->scratch_pd)) { 1207 ret = PTR_ERR(vm->scratch_pd); 1208 goto free_pt; 1209 } 1210 1211 if (use_4lvl(vm)) { 1212 vm->scratch_pdp = alloc_pdp(vm); 1213 if (IS_ERR(vm->scratch_pdp)) { 1214 ret = PTR_ERR(vm->scratch_pdp); 1215 goto free_pd; 1216 } 1217 } 1218 1219 gen8_initialize_pt(vm, vm->scratch_pt); 1220 gen8_initialize_pd(vm, vm->scratch_pd); 1221 if (use_4lvl(vm)) 1222 gen8_initialize_pdp(vm, vm->scratch_pdp); 1223 1224 return 0; 1225 1226 free_pd: 1227 free_pd(vm, vm->scratch_pd); 1228 free_pt: 1229 free_pt(vm, vm->scratch_pt); 1230 free_scratch_page: 1231 cleanup_scratch_page(vm); 1232 1233 return ret; 1234 } 1235 1236 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) 1237 { 1238 struct i915_address_space *vm = &ppgtt->base; 1239 struct drm_i915_private *dev_priv = vm->i915; 1240 enum vgt_g2v_type msg; 1241 int i; 1242 1243 if (use_4lvl(vm)) { 1244 const u64 daddr = px_dma(&ppgtt->pml4); 1245 1246 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); 1247 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); 1248 1249 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : 1250 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); 1251 } else { 1252 for (i = 0; i < GEN8_3LVL_PDPES; i++) { 1253 const u64 daddr = i915_page_dir_dma_addr(ppgtt, i); 1254 1255 I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); 1256 I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); 1257 } 1258 1259 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : 1260 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); 1261 } 1262 1263 I915_WRITE(vgtif_reg(g2v_notify), msg); 1264 1265 return 0; 1266 } 1267 1268 static void gen8_free_scratch(struct i915_address_space *vm) 1269 { 1270 if (use_4lvl(vm)) 1271 free_pdp(vm, vm->scratch_pdp); 1272 free_pd(vm, vm->scratch_pd); 1273 free_pt(vm, vm->scratch_pt); 1274 cleanup_scratch_page(vm); 1275 } 1276 1277 static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, 1278 struct i915_page_directory_pointer *pdp) 1279 { 1280 const unsigned int pdpes = i915_pdpes_per_pdp(vm); 1281 int i; 1282 1283 for (i = 0; i < pdpes; i++) { 1284 if (pdp->page_directory[i] == vm->scratch_pd) 1285 continue; 1286 1287 gen8_free_page_tables(vm, pdp->page_directory[i]); 1288 free_pd(vm, pdp->page_directory[i]); 1289 } 1290 1291 free_pdp(vm, pdp); 1292 } 1293 1294 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) 1295 { 1296 int i; 1297 1298 for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) { 1299 if (ppgtt->pml4.pdps[i] == ppgtt->base.scratch_pdp) 1300 continue; 1301 1302 gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]); 1303 } 1304 1305 cleanup_px(&ppgtt->base, &ppgtt->pml4); 1306 } 1307 1308 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 1309 { 1310 struct drm_i915_private *dev_priv = vm->i915; 1311 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1312 1313 if (intel_vgpu_active(dev_priv)) 1314 gen8_ppgtt_notify_vgt(ppgtt, false); 1315 1316 if (use_4lvl(vm)) 1317 gen8_ppgtt_cleanup_4lvl(ppgtt); 1318 else 1319 gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp); 1320 1321 gen8_free_scratch(vm); 1322 } 1323 1324 static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, 1325 struct i915_page_directory *pd, 1326 u64 start, u64 length) 1327 { 1328 struct i915_page_table *pt; 1329 u64 from = start; 1330 unsigned int pde; 1331 1332 gen8_for_each_pde(pt, pd, start, length, pde) { 1333 int count = gen8_pte_count(start, length); 1334 1335 if (pt == vm->scratch_pt) { 1336 pt = alloc_pt(vm); 1337 if (IS_ERR(pt)) 1338 goto unwind; 1339 1340 if (count < GEN8_PTES || intel_vgpu_active(vm->i915)) 1341 gen8_initialize_pt(vm, pt); 1342 1343 gen8_ppgtt_set_pde(vm, pd, pt, pde); 1344 pd->used_pdes++; 1345 GEM_BUG_ON(pd->used_pdes > I915_PDES); 1346 } 1347 1348 pt->used_ptes += count; 1349 } 1350 return 0; 1351 1352 unwind: 1353 gen8_ppgtt_clear_pd(vm, pd, from, start - from); 1354 return -ENOMEM; 1355 } 1356 1357 static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, 1358 struct i915_page_directory_pointer *pdp, 1359 u64 start, u64 length) 1360 { 1361 struct i915_page_directory *pd; 1362 u64 from = start; 1363 unsigned int pdpe; 1364 int ret; 1365 1366 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1367 if (pd == vm->scratch_pd) { 1368 pd = alloc_pd(vm); 1369 if (IS_ERR(pd)) 1370 goto unwind; 1371 1372 gen8_initialize_pd(vm, pd); 1373 gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); 1374 pdp->used_pdpes++; 1375 GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm)); 1376 1377 mark_tlbs_dirty(i915_vm_to_ppgtt(vm)); 1378 } 1379 1380 ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); 1381 if (unlikely(ret)) 1382 goto unwind_pd; 1383 } 1384 1385 return 0; 1386 1387 unwind_pd: 1388 if (!pd->used_pdes) { 1389 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); 1390 GEM_BUG_ON(!pdp->used_pdpes); 1391 pdp->used_pdpes--; 1392 free_pd(vm, pd); 1393 } 1394 unwind: 1395 gen8_ppgtt_clear_pdp(vm, pdp, from, start - from); 1396 return -ENOMEM; 1397 } 1398 1399 static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm, 1400 u64 start, u64 length) 1401 { 1402 return gen8_ppgtt_alloc_pdp(vm, 1403 &i915_vm_to_ppgtt(vm)->pdp, start, length); 1404 } 1405 1406 static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, 1407 u64 start, u64 length) 1408 { 1409 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1410 struct i915_pml4 *pml4 = &ppgtt->pml4; 1411 struct i915_page_directory_pointer *pdp; 1412 u64 from = start; 1413 u32 pml4e; 1414 int ret; 1415 1416 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1417 if (pml4->pdps[pml4e] == vm->scratch_pdp) { 1418 pdp = alloc_pdp(vm); 1419 if (IS_ERR(pdp)) 1420 goto unwind; 1421 1422 gen8_initialize_pdp(vm, pdp); 1423 gen8_ppgtt_set_pml4e(pml4, pdp, pml4e); 1424 } 1425 1426 ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length); 1427 if (unlikely(ret)) 1428 goto unwind_pdp; 1429 } 1430 1431 return 0; 1432 1433 unwind_pdp: 1434 if (!pdp->used_pdpes) { 1435 gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); 1436 free_pdp(vm, pdp); 1437 } 1438 unwind: 1439 gen8_ppgtt_clear_4lvl(vm, from, start - from); 1440 return -ENOMEM; 1441 } 1442 1443 static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, 1444 struct i915_page_directory_pointer *pdp, 1445 u64 start, u64 length, 1446 gen8_pte_t scratch_pte, 1447 struct seq_file *m) 1448 { 1449 struct i915_address_space *vm = &ppgtt->base; 1450 struct i915_page_directory *pd; 1451 u32 pdpe; 1452 1453 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1454 struct i915_page_table *pt; 1455 u64 pd_len = length; 1456 u64 pd_start = start; 1457 u32 pde; 1458 1459 if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd) 1460 continue; 1461 1462 seq_printf(m, "\tPDPE #%d\n", pdpe); 1463 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1464 u32 pte; 1465 gen8_pte_t *pt_vaddr; 1466 1467 if (pd->page_table[pde] == ppgtt->base.scratch_pt) 1468 continue; 1469 1470 pt_vaddr = kmap_atomic_px(pt); 1471 for (pte = 0; pte < GEN8_PTES; pte += 4) { 1472 u64 va = (pdpe << GEN8_PDPE_SHIFT | 1473 pde << GEN8_PDE_SHIFT | 1474 pte << GEN8_PTE_SHIFT); 1475 int i; 1476 bool found = false; 1477 1478 for (i = 0; i < 4; i++) 1479 if (pt_vaddr[pte + i] != scratch_pte) 1480 found = true; 1481 if (!found) 1482 continue; 1483 1484 seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte); 1485 for (i = 0; i < 4; i++) { 1486 if (pt_vaddr[pte + i] != scratch_pte) 1487 seq_printf(m, " %llx", pt_vaddr[pte + i]); 1488 else 1489 seq_puts(m, " SCRATCH "); 1490 } 1491 seq_puts(m, "\n"); 1492 } 1493 kunmap_atomic(pt_vaddr); 1494 } 1495 } 1496 } 1497 1498 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1499 { 1500 struct i915_address_space *vm = &ppgtt->base; 1501 const gen8_pte_t scratch_pte = 1502 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); 1503 u64 start = 0, length = ppgtt->base.total; 1504 1505 if (use_4lvl(vm)) { 1506 u64 pml4e; 1507 struct i915_pml4 *pml4 = &ppgtt->pml4; 1508 struct i915_page_directory_pointer *pdp; 1509 1510 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1511 if (pml4->pdps[pml4e] == ppgtt->base.scratch_pdp) 1512 continue; 1513 1514 seq_printf(m, " PML4E #%llu\n", pml4e); 1515 gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m); 1516 } 1517 } else { 1518 gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m); 1519 } 1520 } 1521 1522 static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt) 1523 { 1524 struct i915_address_space *vm = &ppgtt->base; 1525 struct i915_page_directory_pointer *pdp = &ppgtt->pdp; 1526 struct i915_page_directory *pd; 1527 u64 start = 0, length = ppgtt->base.total; 1528 u64 from = start; 1529 unsigned int pdpe; 1530 1531 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1532 pd = alloc_pd(vm); 1533 if (IS_ERR(pd)) 1534 goto unwind; 1535 1536 gen8_initialize_pd(vm, pd); 1537 gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); 1538 pdp->used_pdpes++; 1539 } 1540 1541 pdp->used_pdpes++; /* never remove */ 1542 return 0; 1543 1544 unwind: 1545 start -= from; 1546 gen8_for_each_pdpe(pd, pdp, from, start, pdpe) { 1547 gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); 1548 free_pd(vm, pd); 1549 } 1550 pdp->used_pdpes = 0; 1551 return -ENOMEM; 1552 } 1553 1554 /* 1555 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 1556 * with a net effect resembling a 2-level page table in normal x86 terms. Each 1557 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 1558 * space. 1559 * 1560 */ 1561 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1562 { 1563 struct i915_address_space *vm = &ppgtt->base; 1564 struct drm_i915_private *dev_priv = vm->i915; 1565 int ret; 1566 1567 ppgtt->base.total = USES_FULL_48BIT_PPGTT(dev_priv) ? 1568 1ULL << 48 : 1569 1ULL << 32; 1570 1571 /* There are only few exceptions for gen >=6. chv and bxt. 1572 * And we are not sure about the latter so play safe for now. 1573 */ 1574 if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) 1575 ppgtt->base.pt_kmap_wc = true; 1576 1577 ret = gen8_init_scratch(&ppgtt->base); 1578 if (ret) { 1579 ppgtt->base.total = 0; 1580 return ret; 1581 } 1582 1583 if (use_4lvl(vm)) { 1584 ret = setup_px(&ppgtt->base, &ppgtt->pml4); 1585 if (ret) 1586 goto free_scratch; 1587 1588 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); 1589 1590 ppgtt->switch_mm = gen8_mm_switch_4lvl; 1591 ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl; 1592 ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; 1593 ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl; 1594 } else { 1595 ret = __pdp_init(&ppgtt->base, &ppgtt->pdp); 1596 if (ret) 1597 goto free_scratch; 1598 1599 if (intel_vgpu_active(dev_priv)) { 1600 ret = gen8_preallocate_top_level_pdp(ppgtt); 1601 if (ret) { 1602 __pdp_fini(&ppgtt->pdp); 1603 goto free_scratch; 1604 } 1605 } 1606 1607 ppgtt->switch_mm = gen8_mm_switch_3lvl; 1608 ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_3lvl; 1609 ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl; 1610 ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl; 1611 } 1612 1613 if (intel_vgpu_active(dev_priv)) 1614 gen8_ppgtt_notify_vgt(ppgtt, true); 1615 1616 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 1617 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 1618 ppgtt->base.bind_vma = ppgtt_bind_vma; 1619 ppgtt->base.set_pages = ppgtt_set_pages; 1620 ppgtt->base.clear_pages = clear_pages; 1621 ppgtt->debug_dump = gen8_dump_ppgtt; 1622 1623 return 0; 1624 1625 free_scratch: 1626 gen8_free_scratch(&ppgtt->base); 1627 return ret; 1628 } 1629 1630 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1631 { 1632 struct i915_address_space *vm = &ppgtt->base; 1633 struct i915_page_table *unused; 1634 gen6_pte_t scratch_pte; 1635 u32 pd_entry, pte, pde; 1636 u32 start = 0, length = ppgtt->base.total; 1637 1638 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 1639 I915_CACHE_LLC, 0); 1640 1641 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) { 1642 u32 expected; 1643 gen6_pte_t *pt_vaddr; 1644 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]); 1645 pd_entry = readl(ppgtt->pd_addr + pde); 1646 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 1647 1648 if (pd_entry != expected) 1649 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 1650 pde, 1651 pd_entry, 1652 expected); 1653 seq_printf(m, "\tPDE: %x\n", pd_entry); 1654 1655 pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[pde]); 1656 1657 for (pte = 0; pte < GEN6_PTES; pte+=4) { 1658 unsigned long va = 1659 (pde * PAGE_SIZE * GEN6_PTES) + 1660 (pte * PAGE_SIZE); 1661 int i; 1662 bool found = false; 1663 for (i = 0; i < 4; i++) 1664 if (pt_vaddr[pte + i] != scratch_pte) 1665 found = true; 1666 if (!found) 1667 continue; 1668 1669 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 1670 for (i = 0; i < 4; i++) { 1671 if (pt_vaddr[pte + i] != scratch_pte) 1672 seq_printf(m, " %08x", pt_vaddr[pte + i]); 1673 else 1674 seq_puts(m, " SCRATCH "); 1675 } 1676 seq_puts(m, "\n"); 1677 } 1678 kunmap_atomic(pt_vaddr); 1679 } 1680 } 1681 1682 /* Write pde (index) from the page directory @pd to the page table @pt */ 1683 static inline void gen6_write_pde(const struct i915_hw_ppgtt *ppgtt, 1684 const unsigned int pde, 1685 const struct i915_page_table *pt) 1686 { 1687 /* Caller needs to make sure the write completes if necessary */ 1688 writel_relaxed(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID, 1689 ppgtt->pd_addr + pde); 1690 } 1691 1692 /* Write all the page tables found in the ppgtt structure to incrementing page 1693 * directories. */ 1694 static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt, 1695 u32 start, u32 length) 1696 { 1697 struct i915_page_table *pt; 1698 unsigned int pde; 1699 1700 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) 1701 gen6_write_pde(ppgtt, pde, pt); 1702 1703 mark_tlbs_dirty(ppgtt); 1704 wmb(); 1705 } 1706 1707 static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt) 1708 { 1709 GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); 1710 return ppgtt->pd.base.ggtt_offset << 10; 1711 } 1712 1713 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 1714 struct drm_i915_gem_request *req) 1715 { 1716 struct intel_engine_cs *engine = req->engine; 1717 u32 *cs; 1718 1719 /* NB: TLBs must be flushed and invalidated before a switch */ 1720 cs = intel_ring_begin(req, 6); 1721 if (IS_ERR(cs)) 1722 return PTR_ERR(cs); 1723 1724 *cs++ = MI_LOAD_REGISTER_IMM(2); 1725 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); 1726 *cs++ = PP_DIR_DCLV_2G; 1727 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); 1728 *cs++ = get_pd_offset(ppgtt); 1729 *cs++ = MI_NOOP; 1730 intel_ring_advance(req, cs); 1731 1732 return 0; 1733 } 1734 1735 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 1736 struct drm_i915_gem_request *req) 1737 { 1738 struct intel_engine_cs *engine = req->engine; 1739 u32 *cs; 1740 1741 /* NB: TLBs must be flushed and invalidated before a switch */ 1742 cs = intel_ring_begin(req, 6); 1743 if (IS_ERR(cs)) 1744 return PTR_ERR(cs); 1745 1746 *cs++ = MI_LOAD_REGISTER_IMM(2); 1747 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); 1748 *cs++ = PP_DIR_DCLV_2G; 1749 *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); 1750 *cs++ = get_pd_offset(ppgtt); 1751 *cs++ = MI_NOOP; 1752 intel_ring_advance(req, cs); 1753 1754 return 0; 1755 } 1756 1757 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 1758 struct drm_i915_gem_request *req) 1759 { 1760 struct intel_engine_cs *engine = req->engine; 1761 struct drm_i915_private *dev_priv = req->i915; 1762 1763 I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); 1764 I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); 1765 return 0; 1766 } 1767 1768 static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv) 1769 { 1770 struct intel_engine_cs *engine; 1771 enum intel_engine_id id; 1772 1773 for_each_engine(engine, dev_priv, id) { 1774 u32 four_level = USES_FULL_48BIT_PPGTT(dev_priv) ? 1775 GEN8_GFX_PPGTT_48B : 0; 1776 I915_WRITE(RING_MODE_GEN7(engine), 1777 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level)); 1778 } 1779 } 1780 1781 static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv) 1782 { 1783 struct intel_engine_cs *engine; 1784 u32 ecochk, ecobits; 1785 enum intel_engine_id id; 1786 1787 ecobits = I915_READ(GAC_ECO_BITS); 1788 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 1789 1790 ecochk = I915_READ(GAM_ECOCHK); 1791 if (IS_HASWELL(dev_priv)) { 1792 ecochk |= ECOCHK_PPGTT_WB_HSW; 1793 } else { 1794 ecochk |= ECOCHK_PPGTT_LLC_IVB; 1795 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 1796 } 1797 I915_WRITE(GAM_ECOCHK, ecochk); 1798 1799 for_each_engine(engine, dev_priv, id) { 1800 /* GFX_MODE is per-ring on gen7+ */ 1801 I915_WRITE(RING_MODE_GEN7(engine), 1802 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1803 } 1804 } 1805 1806 static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv) 1807 { 1808 u32 ecochk, gab_ctl, ecobits; 1809 1810 ecobits = I915_READ(GAC_ECO_BITS); 1811 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 1812 ECOBITS_PPGTT_CACHE64B); 1813 1814 gab_ctl = I915_READ(GAB_CTL); 1815 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 1816 1817 ecochk = I915_READ(GAM_ECOCHK); 1818 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 1819 1820 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1821 } 1822 1823 /* PPGTT support for Sandybdrige/Gen6 and later */ 1824 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 1825 u64 start, u64 length) 1826 { 1827 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1828 unsigned int first_entry = start >> PAGE_SHIFT; 1829 unsigned int pde = first_entry / GEN6_PTES; 1830 unsigned int pte = first_entry % GEN6_PTES; 1831 unsigned int num_entries = length >> PAGE_SHIFT; 1832 gen6_pte_t scratch_pte = 1833 vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); 1834 1835 while (num_entries) { 1836 struct i915_page_table *pt = ppgtt->pd.page_table[pde++]; 1837 unsigned int end = min(pte + num_entries, GEN6_PTES); 1838 gen6_pte_t *vaddr; 1839 1840 num_entries -= end - pte; 1841 1842 /* Note that the hw doesn't support removing PDE on the fly 1843 * (they are cached inside the context with no means to 1844 * invalidate the cache), so we can only reset the PTE 1845 * entries back to scratch. 1846 */ 1847 1848 vaddr = kmap_atomic_px(pt); 1849 do { 1850 vaddr[pte++] = scratch_pte; 1851 } while (pte < end); 1852 kunmap_atomic(vaddr); 1853 1854 pte = 0; 1855 } 1856 } 1857 1858 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 1859 struct i915_vma *vma, 1860 enum i915_cache_level cache_level, 1861 u32 flags) 1862 { 1863 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1864 unsigned first_entry = vma->node.start >> PAGE_SHIFT; 1865 unsigned act_pt = first_entry / GEN6_PTES; 1866 unsigned act_pte = first_entry % GEN6_PTES; 1867 const u32 pte_encode = vm->pte_encode(0, cache_level, flags); 1868 struct sgt_dma iter = sgt_dma(vma); 1869 gen6_pte_t *vaddr; 1870 1871 vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]); 1872 do { 1873 vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); 1874 1875 iter.dma += PAGE_SIZE; 1876 if (iter.dma == iter.max) { 1877 iter.sg = __sg_next(iter.sg); 1878 if (!iter.sg) 1879 break; 1880 1881 iter.dma = sg_dma_address(iter.sg); 1882 iter.max = iter.dma + iter.sg->length; 1883 } 1884 1885 if (++act_pte == GEN6_PTES) { 1886 kunmap_atomic(vaddr); 1887 vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]); 1888 act_pte = 0; 1889 } 1890 } while (1); 1891 kunmap_atomic(vaddr); 1892 1893 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; 1894 } 1895 1896 static int gen6_alloc_va_range(struct i915_address_space *vm, 1897 u64 start, u64 length) 1898 { 1899 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1900 struct i915_page_table *pt; 1901 u64 from = start; 1902 unsigned int pde; 1903 bool flush = false; 1904 1905 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { 1906 if (pt == vm->scratch_pt) { 1907 pt = alloc_pt(vm); 1908 if (IS_ERR(pt)) 1909 goto unwind_out; 1910 1911 gen6_initialize_pt(vm, pt); 1912 ppgtt->pd.page_table[pde] = pt; 1913 gen6_write_pde(ppgtt, pde, pt); 1914 flush = true; 1915 } 1916 } 1917 1918 if (flush) { 1919 mark_tlbs_dirty(ppgtt); 1920 wmb(); 1921 } 1922 1923 return 0; 1924 1925 unwind_out: 1926 gen6_ppgtt_clear_range(vm, from, start); 1927 return -ENOMEM; 1928 } 1929 1930 static int gen6_init_scratch(struct i915_address_space *vm) 1931 { 1932 int ret; 1933 1934 ret = setup_scratch_page(vm, I915_GFP_DMA); 1935 if (ret) 1936 return ret; 1937 1938 vm->scratch_pt = alloc_pt(vm); 1939 if (IS_ERR(vm->scratch_pt)) { 1940 cleanup_scratch_page(vm); 1941 return PTR_ERR(vm->scratch_pt); 1942 } 1943 1944 gen6_initialize_pt(vm, vm->scratch_pt); 1945 1946 return 0; 1947 } 1948 1949 static void gen6_free_scratch(struct i915_address_space *vm) 1950 { 1951 free_pt(vm, vm->scratch_pt); 1952 cleanup_scratch_page(vm); 1953 } 1954 1955 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 1956 { 1957 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1958 struct i915_page_directory *pd = &ppgtt->pd; 1959 struct i915_page_table *pt; 1960 u32 pde; 1961 1962 drm_mm_remove_node(&ppgtt->node); 1963 1964 gen6_for_all_pdes(pt, pd, pde) 1965 if (pt != vm->scratch_pt) 1966 free_pt(vm, pt); 1967 1968 gen6_free_scratch(vm); 1969 } 1970 1971 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 1972 { 1973 struct i915_address_space *vm = &ppgtt->base; 1974 struct drm_i915_private *dev_priv = ppgtt->base.i915; 1975 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1976 int ret; 1977 1978 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 1979 * allocator works in address space sizes, so it's multiplied by page 1980 * size. We allocate at the top of the GTT to avoid fragmentation. 1981 */ 1982 BUG_ON(!drm_mm_initialized(&ggtt->base.mm)); 1983 1984 ret = gen6_init_scratch(vm); 1985 if (ret) 1986 return ret; 1987 1988 ret = i915_gem_gtt_insert(&ggtt->base, &ppgtt->node, 1989 GEN6_PD_SIZE, GEN6_PD_ALIGN, 1990 I915_COLOR_UNEVICTABLE, 1991 0, ggtt->base.total, 1992 PIN_HIGH); 1993 if (ret) 1994 goto err_out; 1995 1996 if (ppgtt->node.start < ggtt->mappable_end) 1997 DRM_DEBUG("Forced to use aperture for PDEs\n"); 1998 1999 ppgtt->pd.base.ggtt_offset = 2000 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); 2001 2002 ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + 2003 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); 2004 2005 return 0; 2006 2007 err_out: 2008 gen6_free_scratch(vm); 2009 return ret; 2010 } 2011 2012 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 2013 { 2014 return gen6_ppgtt_allocate_page_directories(ppgtt); 2015 } 2016 2017 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, 2018 u64 start, u64 length) 2019 { 2020 struct i915_page_table *unused; 2021 u32 pde; 2022 2023 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) 2024 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; 2025 } 2026 2027 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 2028 { 2029 struct drm_i915_private *dev_priv = ppgtt->base.i915; 2030 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2031 int ret; 2032 2033 ppgtt->base.pte_encode = ggtt->base.pte_encode; 2034 if (intel_vgpu_active(dev_priv) || IS_GEN6(dev_priv)) 2035 ppgtt->switch_mm = gen6_mm_switch; 2036 else if (IS_HASWELL(dev_priv)) 2037 ppgtt->switch_mm = hsw_mm_switch; 2038 else if (IS_GEN7(dev_priv)) 2039 ppgtt->switch_mm = gen7_mm_switch; 2040 else 2041 BUG(); 2042 2043 ret = gen6_ppgtt_alloc(ppgtt); 2044 if (ret) 2045 return ret; 2046 2047 ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; 2048 2049 gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); 2050 gen6_write_page_range(ppgtt, 0, ppgtt->base.total); 2051 2052 ret = gen6_alloc_va_range(&ppgtt->base, 0, ppgtt->base.total); 2053 if (ret) { 2054 gen6_ppgtt_cleanup(&ppgtt->base); 2055 return ret; 2056 } 2057 2058 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 2059 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 2060 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 2061 ppgtt->base.bind_vma = ppgtt_bind_vma; 2062 ppgtt->base.set_pages = ppgtt_set_pages; 2063 ppgtt->base.clear_pages = clear_pages; 2064 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 2065 ppgtt->debug_dump = gen6_dump_ppgtt; 2066 2067 DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", 2068 ppgtt->node.size >> 20, 2069 ppgtt->node.start / PAGE_SIZE); 2070 2071 DRM_DEBUG_DRIVER("Adding PPGTT at offset %x\n", 2072 ppgtt->pd.base.ggtt_offset << 10); 2073 2074 return 0; 2075 } 2076 2077 static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, 2078 struct drm_i915_private *dev_priv) 2079 { 2080 ppgtt->base.i915 = dev_priv; 2081 ppgtt->base.dma = &dev_priv->drm.pdev->dev; 2082 2083 if (INTEL_INFO(dev_priv)->gen < 8) 2084 return gen6_ppgtt_init(ppgtt); 2085 else 2086 return gen8_ppgtt_init(ppgtt); 2087 } 2088 2089 static void i915_address_space_init(struct i915_address_space *vm, 2090 struct drm_i915_private *dev_priv, 2091 const char *name) 2092 { 2093 i915_gem_timeline_init(dev_priv, &vm->timeline, name); 2094 2095 drm_mm_init(&vm->mm, 0, vm->total); 2096 vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; 2097 2098 INIT_LIST_HEAD(&vm->active_list); 2099 INIT_LIST_HEAD(&vm->inactive_list); 2100 INIT_LIST_HEAD(&vm->unbound_list); 2101 2102 list_add_tail(&vm->global_link, &dev_priv->vm_list); 2103 pagevec_init(&vm->free_pages); 2104 } 2105 2106 static void i915_address_space_fini(struct i915_address_space *vm) 2107 { 2108 if (pagevec_count(&vm->free_pages)) 2109 vm_free_pages_release(vm, true); 2110 2111 i915_gem_timeline_fini(&vm->timeline); 2112 drm_mm_takedown(&vm->mm); 2113 list_del(&vm->global_link); 2114 } 2115 2116 static void gtt_write_workarounds(struct drm_i915_private *dev_priv) 2117 { 2118 /* This function is for gtt related workarounds. This function is 2119 * called on driver load and after a GPU reset, so you can place 2120 * workarounds here even if they get overwritten by GPU reset. 2121 */ 2122 /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl */ 2123 if (IS_BROADWELL(dev_priv)) 2124 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 2125 else if (IS_CHERRYVIEW(dev_priv)) 2126 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 2127 else if (IS_GEN9_BC(dev_priv) || IS_GEN10(dev_priv)) 2128 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 2129 else if (IS_GEN9_LP(dev_priv)) 2130 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 2131 2132 /* 2133 * To support 64K PTEs we need to first enable the use of the 2134 * Intermediate-Page-Size(IPS) bit of the PDE field via some magical 2135 * mmio, otherwise the page-walker will simply ignore the IPS bit. This 2136 * shouldn't be needed after GEN10. 2137 * 2138 * 64K pages were first introduced from BDW+, although technically they 2139 * only *work* from gen9+. For pre-BDW we instead have the option for 2140 * 32K pages, but we don't currently have any support for it in our 2141 * driver. 2142 */ 2143 if (HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_64K) && 2144 INTEL_GEN(dev_priv) <= 10) 2145 I915_WRITE(GEN8_GAMW_ECO_DEV_RW_IA, 2146 I915_READ(GEN8_GAMW_ECO_DEV_RW_IA) | 2147 GAMW_ECO_ENABLE_64K_IPS_FIELD); 2148 } 2149 2150 int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv) 2151 { 2152 gtt_write_workarounds(dev_priv); 2153 2154 /* In the case of execlists, PPGTT is enabled by the context descriptor 2155 * and the PDPs are contained within the context itself. We don't 2156 * need to do anything here. */ 2157 if (i915_modparams.enable_execlists) 2158 return 0; 2159 2160 if (!USES_PPGTT(dev_priv)) 2161 return 0; 2162 2163 if (IS_GEN6(dev_priv)) 2164 gen6_ppgtt_enable(dev_priv); 2165 else if (IS_GEN7(dev_priv)) 2166 gen7_ppgtt_enable(dev_priv); 2167 else if (INTEL_GEN(dev_priv) >= 8) 2168 gen8_ppgtt_enable(dev_priv); 2169 else 2170 MISSING_CASE(INTEL_GEN(dev_priv)); 2171 2172 return 0; 2173 } 2174 2175 struct i915_hw_ppgtt * 2176 i915_ppgtt_create(struct drm_i915_private *dev_priv, 2177 struct drm_i915_file_private *fpriv, 2178 const char *name) 2179 { 2180 struct i915_hw_ppgtt *ppgtt; 2181 int ret; 2182 2183 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2184 if (!ppgtt) 2185 return ERR_PTR(-ENOMEM); 2186 2187 ret = __hw_ppgtt_init(ppgtt, dev_priv); 2188 if (ret) { 2189 kfree(ppgtt); 2190 return ERR_PTR(ret); 2191 } 2192 2193 kref_init(&ppgtt->ref); 2194 i915_address_space_init(&ppgtt->base, dev_priv, name); 2195 ppgtt->base.file = fpriv; 2196 2197 trace_i915_ppgtt_create(&ppgtt->base); 2198 2199 return ppgtt; 2200 } 2201 2202 void i915_ppgtt_close(struct i915_address_space *vm) 2203 { 2204 struct list_head *phases[] = { 2205 &vm->active_list, 2206 &vm->inactive_list, 2207 &vm->unbound_list, 2208 NULL, 2209 }, **phase; 2210 2211 GEM_BUG_ON(vm->closed); 2212 vm->closed = true; 2213 2214 for (phase = phases; *phase; phase++) { 2215 struct i915_vma *vma, *vn; 2216 2217 list_for_each_entry_safe(vma, vn, *phase, vm_link) 2218 if (!i915_vma_is_closed(vma)) 2219 i915_vma_close(vma); 2220 } 2221 } 2222 2223 void i915_ppgtt_release(struct kref *kref) 2224 { 2225 struct i915_hw_ppgtt *ppgtt = 2226 container_of(kref, struct i915_hw_ppgtt, ref); 2227 2228 trace_i915_ppgtt_release(&ppgtt->base); 2229 2230 /* vmas should already be unbound and destroyed */ 2231 WARN_ON(!list_empty(&ppgtt->base.active_list)); 2232 WARN_ON(!list_empty(&ppgtt->base.inactive_list)); 2233 WARN_ON(!list_empty(&ppgtt->base.unbound_list)); 2234 2235 ppgtt->base.cleanup(&ppgtt->base); 2236 i915_address_space_fini(&ppgtt->base); 2237 kfree(ppgtt); 2238 } 2239 2240 /* Certain Gen5 chipsets require require idling the GPU before 2241 * unmapping anything from the GTT when VT-d is enabled. 2242 */ 2243 static bool needs_idle_maps(struct drm_i915_private *dev_priv) 2244 { 2245 /* Query intel_iommu to see if we need the workaround. Presumably that 2246 * was loaded first. 2247 */ 2248 return IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_vtd_active(); 2249 } 2250 2251 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) 2252 { 2253 struct intel_engine_cs *engine; 2254 enum intel_engine_id id; 2255 2256 if (INTEL_INFO(dev_priv)->gen < 6) 2257 return; 2258 2259 for_each_engine(engine, dev_priv, id) { 2260 u32 fault_reg; 2261 fault_reg = I915_READ(RING_FAULT_REG(engine)); 2262 if (fault_reg & RING_FAULT_VALID) { 2263 DRM_DEBUG_DRIVER("Unexpected fault\n" 2264 "\tAddr: 0x%08lx\n" 2265 "\tAddress space: %s\n" 2266 "\tSource ID: %d\n" 2267 "\tType: %d\n", 2268 fault_reg & PAGE_MASK, 2269 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 2270 RING_FAULT_SRCID(fault_reg), 2271 RING_FAULT_FAULT_TYPE(fault_reg)); 2272 I915_WRITE(RING_FAULT_REG(engine), 2273 fault_reg & ~RING_FAULT_VALID); 2274 } 2275 } 2276 2277 /* Engine specific init may not have been done till this point. */ 2278 if (dev_priv->engine[RCS]) 2279 POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS])); 2280 } 2281 2282 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv) 2283 { 2284 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2285 2286 /* Don't bother messing with faults pre GEN6 as we have little 2287 * documentation supporting that it's a good idea. 2288 */ 2289 if (INTEL_GEN(dev_priv) < 6) 2290 return; 2291 2292 i915_check_and_clear_faults(dev_priv); 2293 2294 ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total); 2295 2296 i915_ggtt_invalidate(dev_priv); 2297 } 2298 2299 int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, 2300 struct sg_table *pages) 2301 { 2302 do { 2303 if (dma_map_sg(&obj->base.dev->pdev->dev, 2304 pages->sgl, pages->nents, 2305 PCI_DMA_BIDIRECTIONAL)) 2306 return 0; 2307 2308 /* If the DMA remap fails, one cause can be that we have 2309 * too many objects pinned in a small remapping table, 2310 * such as swiotlb. Incrementally purge all other objects and 2311 * try again - if there are no more pages to remove from 2312 * the DMA remapper, i915_gem_shrink will return 0. 2313 */ 2314 GEM_BUG_ON(obj->mm.pages == pages); 2315 } while (i915_gem_shrink(to_i915(obj->base.dev), 2316 obj->base.size >> PAGE_SHIFT, NULL, 2317 I915_SHRINK_BOUND | 2318 I915_SHRINK_UNBOUND | 2319 I915_SHRINK_ACTIVE)); 2320 2321 return -ENOSPC; 2322 } 2323 2324 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 2325 { 2326 writeq(pte, addr); 2327 } 2328 2329 static void gen8_ggtt_insert_page(struct i915_address_space *vm, 2330 dma_addr_t addr, 2331 u64 offset, 2332 enum i915_cache_level level, 2333 u32 unused) 2334 { 2335 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2336 gen8_pte_t __iomem *pte = 2337 (gen8_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT); 2338 2339 gen8_set_pte(pte, gen8_pte_encode(addr, level)); 2340 2341 ggtt->invalidate(vm->i915); 2342 } 2343 2344 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 2345 struct i915_vma *vma, 2346 enum i915_cache_level level, 2347 u32 unused) 2348 { 2349 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2350 struct sgt_iter sgt_iter; 2351 gen8_pte_t __iomem *gtt_entries; 2352 const gen8_pte_t pte_encode = gen8_pte_encode(0, level); 2353 dma_addr_t addr; 2354 2355 gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; 2356 gtt_entries += vma->node.start >> PAGE_SHIFT; 2357 for_each_sgt_dma(addr, sgt_iter, vma->pages) 2358 gen8_set_pte(gtt_entries++, pte_encode | addr); 2359 2360 wmb(); 2361 2362 /* This next bit makes the above posting read even more important. We 2363 * want to flush the TLBs only after we're certain all the PTE updates 2364 * have finished. 2365 */ 2366 ggtt->invalidate(vm->i915); 2367 } 2368 2369 static void gen6_ggtt_insert_page(struct i915_address_space *vm, 2370 dma_addr_t addr, 2371 u64 offset, 2372 enum i915_cache_level level, 2373 u32 flags) 2374 { 2375 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2376 gen6_pte_t __iomem *pte = 2377 (gen6_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT); 2378 2379 iowrite32(vm->pte_encode(addr, level, flags), pte); 2380 2381 ggtt->invalidate(vm->i915); 2382 } 2383 2384 /* 2385 * Binds an object into the global gtt with the specified cache level. The object 2386 * will be accessible to the GPU via commands whose operands reference offsets 2387 * within the global GTT as well as accessible by the GPU through the GMADR 2388 * mapped BAR (dev_priv->mm.gtt->gtt). 2389 */ 2390 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 2391 struct i915_vma *vma, 2392 enum i915_cache_level level, 2393 u32 flags) 2394 { 2395 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2396 gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; 2397 unsigned int i = vma->node.start >> PAGE_SHIFT; 2398 struct sgt_iter iter; 2399 dma_addr_t addr; 2400 for_each_sgt_dma(addr, iter, vma->pages) 2401 iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); 2402 wmb(); 2403 2404 /* This next bit makes the above posting read even more important. We 2405 * want to flush the TLBs only after we're certain all the PTE updates 2406 * have finished. 2407 */ 2408 ggtt->invalidate(vm->i915); 2409 } 2410 2411 static void nop_clear_range(struct i915_address_space *vm, 2412 u64 start, u64 length) 2413 { 2414 } 2415 2416 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 2417 u64 start, u64 length) 2418 { 2419 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2420 unsigned first_entry = start >> PAGE_SHIFT; 2421 unsigned num_entries = length >> PAGE_SHIFT; 2422 const gen8_pte_t scratch_pte = 2423 gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); 2424 gen8_pte_t __iomem *gtt_base = 2425 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 2426 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2427 int i; 2428 2429 if (WARN(num_entries > max_entries, 2430 "First entry = %d; Num entries = %d (max=%d)\n", 2431 first_entry, num_entries, max_entries)) 2432 num_entries = max_entries; 2433 2434 for (i = 0; i < num_entries; i++) 2435 gen8_set_pte(>t_base[i], scratch_pte); 2436 } 2437 2438 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) 2439 { 2440 struct drm_i915_private *dev_priv = vm->i915; 2441 2442 /* 2443 * Make sure the internal GAM fifo has been cleared of all GTT 2444 * writes before exiting stop_machine(). This guarantees that 2445 * any aperture accesses waiting to start in another process 2446 * cannot back up behind the GTT writes causing a hang. 2447 * The register can be any arbitrary GAM register. 2448 */ 2449 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2450 } 2451 2452 struct insert_page { 2453 struct i915_address_space *vm; 2454 dma_addr_t addr; 2455 u64 offset; 2456 enum i915_cache_level level; 2457 }; 2458 2459 static int bxt_vtd_ggtt_insert_page__cb(void *_arg) 2460 { 2461 struct insert_page *arg = _arg; 2462 2463 gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0); 2464 bxt_vtd_ggtt_wa(arg->vm); 2465 2466 return 0; 2467 } 2468 2469 static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, 2470 dma_addr_t addr, 2471 u64 offset, 2472 enum i915_cache_level level, 2473 u32 unused) 2474 { 2475 struct insert_page arg = { vm, addr, offset, level }; 2476 2477 stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL); 2478 } 2479 2480 struct insert_entries { 2481 struct i915_address_space *vm; 2482 struct i915_vma *vma; 2483 enum i915_cache_level level; 2484 }; 2485 2486 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) 2487 { 2488 struct insert_entries *arg = _arg; 2489 2490 gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, 0); 2491 bxt_vtd_ggtt_wa(arg->vm); 2492 2493 return 0; 2494 } 2495 2496 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, 2497 struct i915_vma *vma, 2498 enum i915_cache_level level, 2499 u32 unused) 2500 { 2501 struct insert_entries arg = { vm, vma, level }; 2502 2503 stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); 2504 } 2505 2506 struct clear_range { 2507 struct i915_address_space *vm; 2508 u64 start; 2509 u64 length; 2510 }; 2511 2512 static int bxt_vtd_ggtt_clear_range__cb(void *_arg) 2513 { 2514 struct clear_range *arg = _arg; 2515 2516 gen8_ggtt_clear_range(arg->vm, arg->start, arg->length); 2517 bxt_vtd_ggtt_wa(arg->vm); 2518 2519 return 0; 2520 } 2521 2522 static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm, 2523 u64 start, 2524 u64 length) 2525 { 2526 struct clear_range arg = { vm, start, length }; 2527 2528 stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL); 2529 } 2530 2531 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 2532 u64 start, u64 length) 2533 { 2534 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2535 unsigned first_entry = start >> PAGE_SHIFT; 2536 unsigned num_entries = length >> PAGE_SHIFT; 2537 gen6_pte_t scratch_pte, __iomem *gtt_base = 2538 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 2539 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2540 int i; 2541 2542 if (WARN(num_entries > max_entries, 2543 "First entry = %d; Num entries = %d (max=%d)\n", 2544 first_entry, num_entries, max_entries)) 2545 num_entries = max_entries; 2546 2547 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 2548 I915_CACHE_LLC, 0); 2549 2550 for (i = 0; i < num_entries; i++) 2551 iowrite32(scratch_pte, >t_base[i]); 2552 } 2553 2554 static void i915_ggtt_insert_page(struct i915_address_space *vm, 2555 dma_addr_t addr, 2556 u64 offset, 2557 enum i915_cache_level cache_level, 2558 u32 unused) 2559 { 2560 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2561 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2562 2563 intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); 2564 } 2565 2566 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 2567 struct i915_vma *vma, 2568 enum i915_cache_level cache_level, 2569 u32 unused) 2570 { 2571 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2572 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2573 2574 intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT, 2575 flags); 2576 } 2577 2578 static void i915_ggtt_clear_range(struct i915_address_space *vm, 2579 u64 start, u64 length) 2580 { 2581 intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); 2582 } 2583 2584 static int ggtt_bind_vma(struct i915_vma *vma, 2585 enum i915_cache_level cache_level, 2586 u32 flags) 2587 { 2588 struct drm_i915_private *i915 = vma->vm->i915; 2589 struct drm_i915_gem_object *obj = vma->obj; 2590 u32 pte_flags; 2591 2592 /* Currently applicable only to VLV */ 2593 pte_flags = 0; 2594 if (obj->gt_ro) 2595 pte_flags |= PTE_READ_ONLY; 2596 2597 intel_runtime_pm_get(i915); 2598 vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); 2599 intel_runtime_pm_put(i915); 2600 2601 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; 2602 2603 /* 2604 * Without aliasing PPGTT there's no difference between 2605 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally 2606 * upgrade to both bound if we bind either to avoid double-binding. 2607 */ 2608 vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; 2609 2610 return 0; 2611 } 2612 2613 static void ggtt_unbind_vma(struct i915_vma *vma) 2614 { 2615 struct drm_i915_private *i915 = vma->vm->i915; 2616 2617 intel_runtime_pm_get(i915); 2618 vma->vm->clear_range(vma->vm, vma->node.start, vma->size); 2619 intel_runtime_pm_put(i915); 2620 } 2621 2622 static int aliasing_gtt_bind_vma(struct i915_vma *vma, 2623 enum i915_cache_level cache_level, 2624 u32 flags) 2625 { 2626 struct drm_i915_private *i915 = vma->vm->i915; 2627 u32 pte_flags; 2628 int ret; 2629 2630 /* Currently applicable only to VLV */ 2631 pte_flags = 0; 2632 if (vma->obj->gt_ro) 2633 pte_flags |= PTE_READ_ONLY; 2634 2635 if (flags & I915_VMA_LOCAL_BIND) { 2636 struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; 2637 2638 if (!(vma->flags & I915_VMA_LOCAL_BIND) && 2639 appgtt->base.allocate_va_range) { 2640 ret = appgtt->base.allocate_va_range(&appgtt->base, 2641 vma->node.start, 2642 vma->size); 2643 if (ret) 2644 return ret; 2645 } 2646 2647 appgtt->base.insert_entries(&appgtt->base, vma, cache_level, 2648 pte_flags); 2649 } 2650 2651 if (flags & I915_VMA_GLOBAL_BIND) { 2652 intel_runtime_pm_get(i915); 2653 vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); 2654 intel_runtime_pm_put(i915); 2655 } 2656 2657 return 0; 2658 } 2659 2660 static void aliasing_gtt_unbind_vma(struct i915_vma *vma) 2661 { 2662 struct drm_i915_private *i915 = vma->vm->i915; 2663 2664 if (vma->flags & I915_VMA_GLOBAL_BIND) { 2665 intel_runtime_pm_get(i915); 2666 vma->vm->clear_range(vma->vm, vma->node.start, vma->size); 2667 intel_runtime_pm_put(i915); 2668 } 2669 2670 if (vma->flags & I915_VMA_LOCAL_BIND) { 2671 struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->base; 2672 2673 vm->clear_range(vm, vma->node.start, vma->size); 2674 } 2675 } 2676 2677 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, 2678 struct sg_table *pages) 2679 { 2680 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2681 struct device *kdev = &dev_priv->drm.pdev->dev; 2682 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2683 2684 if (unlikely(ggtt->do_idle_maps)) { 2685 if (i915_gem_wait_for_idle(dev_priv, 0)) { 2686 DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); 2687 /* Wait a bit, in hopes it avoids the hang */ 2688 udelay(10); 2689 } 2690 } 2691 2692 dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL); 2693 } 2694 2695 static int ggtt_set_pages(struct i915_vma *vma) 2696 { 2697 int ret; 2698 2699 GEM_BUG_ON(vma->pages); 2700 2701 ret = i915_get_ggtt_vma_pages(vma); 2702 if (ret) 2703 return ret; 2704 2705 vma->page_sizes = vma->obj->mm.page_sizes; 2706 2707 return 0; 2708 } 2709 2710 static void i915_gtt_color_adjust(const struct drm_mm_node *node, 2711 unsigned long color, 2712 u64 *start, 2713 u64 *end) 2714 { 2715 if (node->allocated && node->color != color) 2716 *start += I915_GTT_PAGE_SIZE; 2717 2718 /* Also leave a space between the unallocated reserved node after the 2719 * GTT and any objects within the GTT, i.e. we use the color adjustment 2720 * to insert a guard page to prevent prefetches crossing over the 2721 * GTT boundary. 2722 */ 2723 node = list_next_entry(node, node_list); 2724 if (node->color != color) 2725 *end -= I915_GTT_PAGE_SIZE; 2726 } 2727 2728 int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) 2729 { 2730 struct i915_ggtt *ggtt = &i915->ggtt; 2731 struct i915_hw_ppgtt *ppgtt; 2732 int err; 2733 2734 ppgtt = i915_ppgtt_create(i915, ERR_PTR(-EPERM), "[alias]"); 2735 if (IS_ERR(ppgtt)) 2736 return PTR_ERR(ppgtt); 2737 2738 if (WARN_ON(ppgtt->base.total < ggtt->base.total)) { 2739 err = -ENODEV; 2740 goto err_ppgtt; 2741 } 2742 2743 if (ppgtt->base.allocate_va_range) { 2744 /* Note we only pre-allocate as far as the end of the global 2745 * GTT. On 48b / 4-level page-tables, the difference is very, 2746 * very significant! We have to preallocate as GVT/vgpu does 2747 * not like the page directory disappearing. 2748 */ 2749 err = ppgtt->base.allocate_va_range(&ppgtt->base, 2750 0, ggtt->base.total); 2751 if (err) 2752 goto err_ppgtt; 2753 } 2754 2755 i915->mm.aliasing_ppgtt = ppgtt; 2756 2757 WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); 2758 ggtt->base.bind_vma = aliasing_gtt_bind_vma; 2759 2760 WARN_ON(ggtt->base.unbind_vma != ggtt_unbind_vma); 2761 ggtt->base.unbind_vma = aliasing_gtt_unbind_vma; 2762 2763 return 0; 2764 2765 err_ppgtt: 2766 i915_ppgtt_put(ppgtt); 2767 return err; 2768 } 2769 2770 void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915) 2771 { 2772 struct i915_ggtt *ggtt = &i915->ggtt; 2773 struct i915_hw_ppgtt *ppgtt; 2774 2775 ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt); 2776 if (!ppgtt) 2777 return; 2778 2779 i915_ppgtt_put(ppgtt); 2780 2781 ggtt->base.bind_vma = ggtt_bind_vma; 2782 ggtt->base.unbind_vma = ggtt_unbind_vma; 2783 } 2784 2785 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) 2786 { 2787 /* Let GEM Manage all of the aperture. 2788 * 2789 * However, leave one page at the end still bound to the scratch page. 2790 * There are a number of places where the hardware apparently prefetches 2791 * past the end of the object, and we've seen multiple hangs with the 2792 * GPU head pointer stuck in a batchbuffer bound at the last page of the 2793 * aperture. One page should be enough to keep any prefetching inside 2794 * of the aperture. 2795 */ 2796 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2797 unsigned long hole_start, hole_end; 2798 struct drm_mm_node *entry; 2799 int ret; 2800 2801 ret = intel_vgt_balloon(dev_priv); 2802 if (ret) 2803 return ret; 2804 2805 /* Reserve a mappable slot for our lockless error capture */ 2806 ret = drm_mm_insert_node_in_range(&ggtt->base.mm, &ggtt->error_capture, 2807 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 2808 0, ggtt->mappable_end, 2809 DRM_MM_INSERT_LOW); 2810 if (ret) 2811 return ret; 2812 2813 /* Clear any non-preallocated blocks */ 2814 drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { 2815 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 2816 hole_start, hole_end); 2817 ggtt->base.clear_range(&ggtt->base, hole_start, 2818 hole_end - hole_start); 2819 } 2820 2821 /* And finally clear the reserved guard page */ 2822 ggtt->base.clear_range(&ggtt->base, 2823 ggtt->base.total - PAGE_SIZE, PAGE_SIZE); 2824 2825 if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { 2826 ret = i915_gem_init_aliasing_ppgtt(dev_priv); 2827 if (ret) 2828 goto err; 2829 } 2830 2831 return 0; 2832 2833 err: 2834 drm_mm_remove_node(&ggtt->error_capture); 2835 return ret; 2836 } 2837 2838 /** 2839 * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization 2840 * @dev_priv: i915 device 2841 */ 2842 void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) 2843 { 2844 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2845 struct i915_vma *vma, *vn; 2846 struct pagevec *pvec; 2847 2848 ggtt->base.closed = true; 2849 2850 mutex_lock(&dev_priv->drm.struct_mutex); 2851 WARN_ON(!list_empty(&ggtt->base.active_list)); 2852 list_for_each_entry_safe(vma, vn, &ggtt->base.inactive_list, vm_link) 2853 WARN_ON(i915_vma_unbind(vma)); 2854 mutex_unlock(&dev_priv->drm.struct_mutex); 2855 2856 i915_gem_cleanup_stolen(&dev_priv->drm); 2857 2858 mutex_lock(&dev_priv->drm.struct_mutex); 2859 i915_gem_fini_aliasing_ppgtt(dev_priv); 2860 2861 if (drm_mm_node_allocated(&ggtt->error_capture)) 2862 drm_mm_remove_node(&ggtt->error_capture); 2863 2864 if (drm_mm_initialized(&ggtt->base.mm)) { 2865 intel_vgt_deballoon(dev_priv); 2866 i915_address_space_fini(&ggtt->base); 2867 } 2868 2869 ggtt->base.cleanup(&ggtt->base); 2870 2871 pvec = &dev_priv->mm.wc_stash; 2872 if (pvec->nr) { 2873 set_pages_array_wb(pvec->pages, pvec->nr); 2874 __pagevec_release(pvec); 2875 } 2876 2877 mutex_unlock(&dev_priv->drm.struct_mutex); 2878 2879 arch_phys_wc_del(ggtt->mtrr); 2880 io_mapping_fini(&ggtt->mappable); 2881 } 2882 2883 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 2884 { 2885 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 2886 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 2887 return snb_gmch_ctl << 20; 2888 } 2889 2890 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 2891 { 2892 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 2893 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 2894 if (bdw_gmch_ctl) 2895 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 2896 2897 #ifdef CONFIG_X86_32 2898 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ 2899 if (bdw_gmch_ctl > 4) 2900 bdw_gmch_ctl = 4; 2901 #endif 2902 2903 return bdw_gmch_ctl << 20; 2904 } 2905 2906 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 2907 { 2908 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 2909 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 2910 2911 if (gmch_ctrl) 2912 return 1 << (20 + gmch_ctrl); 2913 2914 return 0; 2915 } 2916 2917 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 2918 { 2919 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 2920 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 2921 return (size_t)snb_gmch_ctl << 25; /* 32 MB units */ 2922 } 2923 2924 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 2925 { 2926 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2927 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 2928 return (size_t)bdw_gmch_ctl << 25; /* 32 MB units */ 2929 } 2930 2931 static size_t chv_get_stolen_size(u16 gmch_ctrl) 2932 { 2933 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; 2934 gmch_ctrl &= SNB_GMCH_GMS_MASK; 2935 2936 /* 2937 * 0x0 to 0x10: 32MB increments starting at 0MB 2938 * 0x11 to 0x16: 4MB increments starting at 8MB 2939 * 0x17 to 0x1d: 4MB increments start at 36MB 2940 */ 2941 if (gmch_ctrl < 0x11) 2942 return (size_t)gmch_ctrl << 25; 2943 else if (gmch_ctrl < 0x17) 2944 return (size_t)(gmch_ctrl - 0x11 + 2) << 22; 2945 else 2946 return (size_t)(gmch_ctrl - 0x17 + 9) << 22; 2947 } 2948 2949 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) 2950 { 2951 gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2952 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; 2953 2954 if (gen9_gmch_ctl < 0xf0) 2955 return (size_t)gen9_gmch_ctl << 25; /* 32 MB units */ 2956 else 2957 /* 4MB increments starting at 0xf0 for 4MB */ 2958 return (size_t)(gen9_gmch_ctl - 0xf0 + 1) << 22; 2959 } 2960 2961 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 2962 { 2963 struct drm_i915_private *dev_priv = ggtt->base.i915; 2964 struct pci_dev *pdev = dev_priv->drm.pdev; 2965 phys_addr_t phys_addr; 2966 int ret; 2967 2968 /* For Modern GENs the PTEs and register space are split in the BAR */ 2969 phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; 2970 2971 /* 2972 * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range 2973 * will be dropped. For WC mappings in general we have 64 byte burst 2974 * writes when the WC buffer is flushed, so we can't use it, but have to 2975 * resort to an uncached mapping. The WC issue is easily caught by the 2976 * readback check when writing GTT PTE entries. 2977 */ 2978 if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10) 2979 ggtt->gsm = ioremap_nocache(phys_addr, size); 2980 else 2981 ggtt->gsm = ioremap_wc(phys_addr, size); 2982 if (!ggtt->gsm) { 2983 DRM_ERROR("Failed to map the ggtt page table\n"); 2984 return -ENOMEM; 2985 } 2986 2987 ret = setup_scratch_page(&ggtt->base, GFP_DMA32); 2988 if (ret) { 2989 DRM_ERROR("Scratch setup failed\n"); 2990 /* iounmap will also get called at remove, but meh */ 2991 iounmap(ggtt->gsm); 2992 return ret; 2993 } 2994 2995 return 0; 2996 } 2997 2998 static struct intel_ppat_entry * 2999 __alloc_ppat_entry(struct intel_ppat *ppat, unsigned int index, u8 value) 3000 { 3001 struct intel_ppat_entry *entry = &ppat->entries[index]; 3002 3003 GEM_BUG_ON(index >= ppat->max_entries); 3004 GEM_BUG_ON(test_bit(index, ppat->used)); 3005 3006 entry->ppat = ppat; 3007 entry->value = value; 3008 kref_init(&entry->ref); 3009 set_bit(index, ppat->used); 3010 set_bit(index, ppat->dirty); 3011 3012 return entry; 3013 } 3014 3015 static void __free_ppat_entry(struct intel_ppat_entry *entry) 3016 { 3017 struct intel_ppat *ppat = entry->ppat; 3018 unsigned int index = entry - ppat->entries; 3019 3020 GEM_BUG_ON(index >= ppat->max_entries); 3021 GEM_BUG_ON(!test_bit(index, ppat->used)); 3022 3023 entry->value = ppat->clear_value; 3024 clear_bit(index, ppat->used); 3025 set_bit(index, ppat->dirty); 3026 } 3027 3028 /** 3029 * intel_ppat_get - get a usable PPAT entry 3030 * @i915: i915 device instance 3031 * @value: the PPAT value required by the caller 3032 * 3033 * The function tries to search if there is an existing PPAT entry which 3034 * matches with the required value. If perfectly matched, the existing PPAT 3035 * entry will be used. If only partially matched, it will try to check if 3036 * there is any available PPAT index. If yes, it will allocate a new PPAT 3037 * index for the required entry and update the HW. If not, the partially 3038 * matched entry will be used. 3039 */ 3040 const struct intel_ppat_entry * 3041 intel_ppat_get(struct drm_i915_private *i915, u8 value) 3042 { 3043 struct intel_ppat *ppat = &i915->ppat; 3044 struct intel_ppat_entry *entry; 3045 unsigned int scanned, best_score; 3046 int i; 3047 3048 GEM_BUG_ON(!ppat->max_entries); 3049 3050 scanned = best_score = 0; 3051 for_each_set_bit(i, ppat->used, ppat->max_entries) { 3052 unsigned int score; 3053 3054 score = ppat->match(ppat->entries[i].value, value); 3055 if (score > best_score) { 3056 entry = &ppat->entries[i]; 3057 if (score == INTEL_PPAT_PERFECT_MATCH) { 3058 kref_get(&entry->ref); 3059 return entry; 3060 } 3061 best_score = score; 3062 } 3063 scanned++; 3064 } 3065 3066 if (scanned == ppat->max_entries) { 3067 if (!best_score) 3068 return ERR_PTR(-ENOSPC); 3069 3070 kref_get(&entry->ref); 3071 return entry; 3072 } 3073 3074 i = find_first_zero_bit(ppat->used, ppat->max_entries); 3075 entry = __alloc_ppat_entry(ppat, i, value); 3076 ppat->update_hw(i915); 3077 return entry; 3078 } 3079 3080 static void release_ppat(struct kref *kref) 3081 { 3082 struct intel_ppat_entry *entry = 3083 container_of(kref, struct intel_ppat_entry, ref); 3084 struct drm_i915_private *i915 = entry->ppat->i915; 3085 3086 __free_ppat_entry(entry); 3087 entry->ppat->update_hw(i915); 3088 } 3089 3090 /** 3091 * intel_ppat_put - put back the PPAT entry got from intel_ppat_get() 3092 * @entry: an intel PPAT entry 3093 * 3094 * Put back the PPAT entry got from intel_ppat_get(). If the PPAT index of the 3095 * entry is dynamically allocated, its reference count will be decreased. Once 3096 * the reference count becomes into zero, the PPAT index becomes free again. 3097 */ 3098 void intel_ppat_put(const struct intel_ppat_entry *entry) 3099 { 3100 struct intel_ppat *ppat = entry->ppat; 3101 unsigned int index = entry - ppat->entries; 3102 3103 GEM_BUG_ON(!ppat->max_entries); 3104 3105 kref_put(&ppat->entries[index].ref, release_ppat); 3106 } 3107 3108 static void cnl_private_pat_update_hw(struct drm_i915_private *dev_priv) 3109 { 3110 struct intel_ppat *ppat = &dev_priv->ppat; 3111 int i; 3112 3113 for_each_set_bit(i, ppat->dirty, ppat->max_entries) { 3114 I915_WRITE(GEN10_PAT_INDEX(i), ppat->entries[i].value); 3115 clear_bit(i, ppat->dirty); 3116 } 3117 } 3118 3119 static void bdw_private_pat_update_hw(struct drm_i915_private *dev_priv) 3120 { 3121 struct intel_ppat *ppat = &dev_priv->ppat; 3122 u64 pat = 0; 3123 int i; 3124 3125 for (i = 0; i < ppat->max_entries; i++) 3126 pat |= GEN8_PPAT(i, ppat->entries[i].value); 3127 3128 bitmap_clear(ppat->dirty, 0, ppat->max_entries); 3129 3130 I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); 3131 I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); 3132 } 3133 3134 static unsigned int bdw_private_pat_match(u8 src, u8 dst) 3135 { 3136 unsigned int score = 0; 3137 enum { 3138 AGE_MATCH = BIT(0), 3139 TC_MATCH = BIT(1), 3140 CA_MATCH = BIT(2), 3141 }; 3142 3143 /* Cache attribute has to be matched. */ 3144 if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst)) 3145 return 0; 3146 3147 score |= CA_MATCH; 3148 3149 if (GEN8_PPAT_GET_TC(src) == GEN8_PPAT_GET_TC(dst)) 3150 score |= TC_MATCH; 3151 3152 if (GEN8_PPAT_GET_AGE(src) == GEN8_PPAT_GET_AGE(dst)) 3153 score |= AGE_MATCH; 3154 3155 if (score == (AGE_MATCH | TC_MATCH | CA_MATCH)) 3156 return INTEL_PPAT_PERFECT_MATCH; 3157 3158 return score; 3159 } 3160 3161 static unsigned int chv_private_pat_match(u8 src, u8 dst) 3162 { 3163 return (CHV_PPAT_GET_SNOOP(src) == CHV_PPAT_GET_SNOOP(dst)) ? 3164 INTEL_PPAT_PERFECT_MATCH : 0; 3165 } 3166 3167 static void cnl_setup_private_ppat(struct intel_ppat *ppat) 3168 { 3169 ppat->max_entries = 8; 3170 ppat->update_hw = cnl_private_pat_update_hw; 3171 ppat->match = bdw_private_pat_match; 3172 ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3); 3173 3174 /* XXX: spec is unclear if this is still needed for CNL+ */ 3175 if (!USES_PPGTT(ppat->i915)) { 3176 __alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC); 3177 return; 3178 } 3179 3180 __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC); 3181 __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); 3182 __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); 3183 __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC); 3184 __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); 3185 __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); 3186 __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); 3187 __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 3188 } 3189 3190 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 3191 * bits. When using advanced contexts each context stores its own PAT, but 3192 * writing this data shouldn't be harmful even in those cases. */ 3193 static void bdw_setup_private_ppat(struct intel_ppat *ppat) 3194 { 3195 ppat->max_entries = 8; 3196 ppat->update_hw = bdw_private_pat_update_hw; 3197 ppat->match = bdw_private_pat_match; 3198 ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3); 3199 3200 if (!USES_PPGTT(ppat->i915)) { 3201 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry, 3202 * so RTL will always use the value corresponding to 3203 * pat_sel = 000". 3204 * So let's disable cache for GGTT to avoid screen corruptions. 3205 * MOCS still can be used though. 3206 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work 3207 * before this patch, i.e. the same uncached + snooping access 3208 * like on gen6/7 seems to be in effect. 3209 * - So this just fixes blitter/render access. Again it looks 3210 * like it's not just uncached access, but uncached + snooping. 3211 * So we can still hold onto all our assumptions wrt cpu 3212 * clflushing on LLC machines. 3213 */ 3214 __alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC); 3215 return; 3216 } 3217 3218 __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC); /* for normal objects, no eLLC */ 3219 __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); /* for something pointing to ptes? */ 3220 __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); /* for scanout with eLLC */ 3221 __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC); /* Uncached objects, mostly for scanout */ 3222 __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); 3223 __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); 3224 __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); 3225 __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 3226 } 3227 3228 static void chv_setup_private_ppat(struct intel_ppat *ppat) 3229 { 3230 ppat->max_entries = 8; 3231 ppat->update_hw = bdw_private_pat_update_hw; 3232 ppat->match = chv_private_pat_match; 3233 ppat->clear_value = CHV_PPAT_SNOOP; 3234 3235 /* 3236 * Map WB on BDW to snooped on CHV. 3237 * 3238 * Only the snoop bit has meaning for CHV, the rest is 3239 * ignored. 3240 * 3241 * The hardware will never snoop for certain types of accesses: 3242 * - CPU GTT (GMADR->GGTT->no snoop->memory) 3243 * - PPGTT page tables 3244 * - some other special cycles 3245 * 3246 * As with BDW, we also need to consider the following for GT accesses: 3247 * "For GGTT, there is NO pat_sel[2:0] from the entry, 3248 * so RTL will always use the value corresponding to 3249 * pat_sel = 000". 3250 * Which means we must set the snoop bit in PAT entry 0 3251 * in order to keep the global status page working. 3252 */ 3253 3254 __alloc_ppat_entry(ppat, 0, CHV_PPAT_SNOOP); 3255 __alloc_ppat_entry(ppat, 1, 0); 3256 __alloc_ppat_entry(ppat, 2, 0); 3257 __alloc_ppat_entry(ppat, 3, 0); 3258 __alloc_ppat_entry(ppat, 4, CHV_PPAT_SNOOP); 3259 __alloc_ppat_entry(ppat, 5, CHV_PPAT_SNOOP); 3260 __alloc_ppat_entry(ppat, 6, CHV_PPAT_SNOOP); 3261 __alloc_ppat_entry(ppat, 7, CHV_PPAT_SNOOP); 3262 } 3263 3264 static void gen6_gmch_remove(struct i915_address_space *vm) 3265 { 3266 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 3267 3268 iounmap(ggtt->gsm); 3269 cleanup_scratch_page(vm); 3270 } 3271 3272 static void setup_private_pat(struct drm_i915_private *dev_priv) 3273 { 3274 struct intel_ppat *ppat = &dev_priv->ppat; 3275 int i; 3276 3277 ppat->i915 = dev_priv; 3278 3279 if (INTEL_GEN(dev_priv) >= 10) 3280 cnl_setup_private_ppat(ppat); 3281 else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv)) 3282 chv_setup_private_ppat(ppat); 3283 else 3284 bdw_setup_private_ppat(ppat); 3285 3286 GEM_BUG_ON(ppat->max_entries > INTEL_MAX_PPAT_ENTRIES); 3287 3288 for_each_clear_bit(i, ppat->used, ppat->max_entries) { 3289 ppat->entries[i].value = ppat->clear_value; 3290 ppat->entries[i].ppat = ppat; 3291 set_bit(i, ppat->dirty); 3292 } 3293 3294 ppat->update_hw(dev_priv); 3295 } 3296 3297 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 3298 { 3299 struct drm_i915_private *dev_priv = ggtt->base.i915; 3300 struct pci_dev *pdev = dev_priv->drm.pdev; 3301 unsigned int size; 3302 u16 snb_gmch_ctl; 3303 int err; 3304 3305 /* TODO: We're not aware of mappable constraints on gen8 yet */ 3306 ggtt->mappable_base = pci_resource_start(pdev, 2); 3307 ggtt->mappable_end = pci_resource_len(pdev, 2); 3308 3309 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39)); 3310 if (!err) 3311 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); 3312 if (err) 3313 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); 3314 3315 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3316 3317 if (INTEL_GEN(dev_priv) >= 9) { 3318 ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl); 3319 size = gen8_get_total_gtt_size(snb_gmch_ctl); 3320 } else if (IS_CHERRYVIEW(dev_priv)) { 3321 ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl); 3322 size = chv_get_total_gtt_size(snb_gmch_ctl); 3323 } else { 3324 ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl); 3325 size = gen8_get_total_gtt_size(snb_gmch_ctl); 3326 } 3327 3328 ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT; 3329 ggtt->base.cleanup = gen6_gmch_remove; 3330 ggtt->base.bind_vma = ggtt_bind_vma; 3331 ggtt->base.unbind_vma = ggtt_unbind_vma; 3332 ggtt->base.set_pages = ggtt_set_pages; 3333 ggtt->base.clear_pages = clear_pages; 3334 ggtt->base.insert_page = gen8_ggtt_insert_page; 3335 ggtt->base.clear_range = nop_clear_range; 3336 if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) 3337 ggtt->base.clear_range = gen8_ggtt_clear_range; 3338 3339 ggtt->base.insert_entries = gen8_ggtt_insert_entries; 3340 3341 /* Serialize GTT updates with aperture access on BXT if VT-d is on. */ 3342 if (intel_ggtt_update_needs_vtd_wa(dev_priv)) { 3343 ggtt->base.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; 3344 ggtt->base.insert_page = bxt_vtd_ggtt_insert_page__BKL; 3345 if (ggtt->base.clear_range != nop_clear_range) 3346 ggtt->base.clear_range = bxt_vtd_ggtt_clear_range__BKL; 3347 } 3348 3349 ggtt->invalidate = gen6_ggtt_invalidate; 3350 3351 setup_private_pat(dev_priv); 3352 3353 return ggtt_probe_common(ggtt, size); 3354 } 3355 3356 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 3357 { 3358 struct drm_i915_private *dev_priv = ggtt->base.i915; 3359 struct pci_dev *pdev = dev_priv->drm.pdev; 3360 unsigned int size; 3361 u16 snb_gmch_ctl; 3362 int err; 3363 3364 ggtt->mappable_base = pci_resource_start(pdev, 2); 3365 ggtt->mappable_end = pci_resource_len(pdev, 2); 3366 3367 /* 64/512MB is the current min/max we actually know of, but this is just 3368 * a coarse sanity check. 3369 */ 3370 if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { 3371 DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end); 3372 return -ENXIO; 3373 } 3374 3375 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40)); 3376 if (!err) 3377 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); 3378 if (err) 3379 DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); 3380 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3381 3382 ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); 3383 3384 size = gen6_get_total_gtt_size(snb_gmch_ctl); 3385 ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT; 3386 3387 ggtt->base.clear_range = gen6_ggtt_clear_range; 3388 ggtt->base.insert_page = gen6_ggtt_insert_page; 3389 ggtt->base.insert_entries = gen6_ggtt_insert_entries; 3390 ggtt->base.bind_vma = ggtt_bind_vma; 3391 ggtt->base.unbind_vma = ggtt_unbind_vma; 3392 ggtt->base.set_pages = ggtt_set_pages; 3393 ggtt->base.clear_pages = clear_pages; 3394 ggtt->base.cleanup = gen6_gmch_remove; 3395 3396 ggtt->invalidate = gen6_ggtt_invalidate; 3397 3398 if (HAS_EDRAM(dev_priv)) 3399 ggtt->base.pte_encode = iris_pte_encode; 3400 else if (IS_HASWELL(dev_priv)) 3401 ggtt->base.pte_encode = hsw_pte_encode; 3402 else if (IS_VALLEYVIEW(dev_priv)) 3403 ggtt->base.pte_encode = byt_pte_encode; 3404 else if (INTEL_GEN(dev_priv) >= 7) 3405 ggtt->base.pte_encode = ivb_pte_encode; 3406 else 3407 ggtt->base.pte_encode = snb_pte_encode; 3408 3409 return ggtt_probe_common(ggtt, size); 3410 } 3411 3412 static void i915_gmch_remove(struct i915_address_space *vm) 3413 { 3414 intel_gmch_remove(); 3415 } 3416 3417 static int i915_gmch_probe(struct i915_ggtt *ggtt) 3418 { 3419 struct drm_i915_private *dev_priv = ggtt->base.i915; 3420 int ret; 3421 3422 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL); 3423 if (!ret) { 3424 DRM_ERROR("failed to set up gmch\n"); 3425 return -EIO; 3426 } 3427 3428 intel_gtt_get(&ggtt->base.total, 3429 &ggtt->stolen_size, 3430 &ggtt->mappable_base, 3431 &ggtt->mappable_end); 3432 3433 ggtt->do_idle_maps = needs_idle_maps(dev_priv); 3434 ggtt->base.insert_page = i915_ggtt_insert_page; 3435 ggtt->base.insert_entries = i915_ggtt_insert_entries; 3436 ggtt->base.clear_range = i915_ggtt_clear_range; 3437 ggtt->base.bind_vma = ggtt_bind_vma; 3438 ggtt->base.unbind_vma = ggtt_unbind_vma; 3439 ggtt->base.set_pages = ggtt_set_pages; 3440 ggtt->base.clear_pages = clear_pages; 3441 ggtt->base.cleanup = i915_gmch_remove; 3442 3443 ggtt->invalidate = gmch_ggtt_invalidate; 3444 3445 if (unlikely(ggtt->do_idle_maps)) 3446 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 3447 3448 return 0; 3449 } 3450 3451 /** 3452 * i915_ggtt_probe_hw - Probe GGTT hardware location 3453 * @dev_priv: i915 device 3454 */ 3455 int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) 3456 { 3457 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3458 int ret; 3459 3460 ggtt->base.i915 = dev_priv; 3461 ggtt->base.dma = &dev_priv->drm.pdev->dev; 3462 3463 if (INTEL_GEN(dev_priv) <= 5) 3464 ret = i915_gmch_probe(ggtt); 3465 else if (INTEL_GEN(dev_priv) < 8) 3466 ret = gen6_gmch_probe(ggtt); 3467 else 3468 ret = gen8_gmch_probe(ggtt); 3469 if (ret) 3470 return ret; 3471 3472 /* Trim the GGTT to fit the GuC mappable upper range (when enabled). 3473 * This is easier than doing range restriction on the fly, as we 3474 * currently don't have any bits spare to pass in this upper 3475 * restriction! 3476 */ 3477 if (HAS_GUC(dev_priv) && i915_modparams.enable_guc_loading) { 3478 ggtt->base.total = min_t(u64, ggtt->base.total, GUC_GGTT_TOP); 3479 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); 3480 } 3481 3482 if ((ggtt->base.total - 1) >> 32) { 3483 DRM_ERROR("We never expected a Global GTT with more than 32bits" 3484 " of address space! Found %lldM!\n", 3485 ggtt->base.total >> 20); 3486 ggtt->base.total = 1ULL << 32; 3487 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); 3488 } 3489 3490 if (ggtt->mappable_end > ggtt->base.total) { 3491 DRM_ERROR("mappable aperture extends past end of GGTT," 3492 " aperture=%llx, total=%llx\n", 3493 ggtt->mappable_end, ggtt->base.total); 3494 ggtt->mappable_end = ggtt->base.total; 3495 } 3496 3497 /* GMADR is the PCI mmio aperture into the global GTT. */ 3498 DRM_INFO("Memory usable by graphics device = %lluM\n", 3499 ggtt->base.total >> 20); 3500 DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20); 3501 DRM_DEBUG_DRIVER("GTT stolen size = %uM\n", ggtt->stolen_size >> 20); 3502 if (intel_vtd_active()) 3503 DRM_INFO("VT-d active for gfx access\n"); 3504 3505 return 0; 3506 } 3507 3508 /** 3509 * i915_ggtt_init_hw - Initialize GGTT hardware 3510 * @dev_priv: i915 device 3511 */ 3512 int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) 3513 { 3514 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3515 int ret; 3516 3517 INIT_LIST_HEAD(&dev_priv->vm_list); 3518 3519 /* Note that we use page colouring to enforce a guard page at the 3520 * end of the address space. This is required as the CS may prefetch 3521 * beyond the end of the batch buffer, across the page boundary, 3522 * and beyond the end of the GTT if we do not provide a guard. 3523 */ 3524 mutex_lock(&dev_priv->drm.struct_mutex); 3525 i915_address_space_init(&ggtt->base, dev_priv, "[global]"); 3526 if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv)) 3527 ggtt->base.mm.color_adjust = i915_gtt_color_adjust; 3528 mutex_unlock(&dev_priv->drm.struct_mutex); 3529 3530 if (!io_mapping_init_wc(&dev_priv->ggtt.mappable, 3531 dev_priv->ggtt.mappable_base, 3532 dev_priv->ggtt.mappable_end)) { 3533 ret = -EIO; 3534 goto out_gtt_cleanup; 3535 } 3536 3537 ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end); 3538 3539 /* 3540 * Initialise stolen early so that we may reserve preallocated 3541 * objects for the BIOS to KMS transition. 3542 */ 3543 ret = i915_gem_init_stolen(dev_priv); 3544 if (ret) 3545 goto out_gtt_cleanup; 3546 3547 return 0; 3548 3549 out_gtt_cleanup: 3550 ggtt->base.cleanup(&ggtt->base); 3551 return ret; 3552 } 3553 3554 int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) 3555 { 3556 if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt()) 3557 return -EIO; 3558 3559 return 0; 3560 } 3561 3562 void i915_ggtt_enable_guc(struct drm_i915_private *i915) 3563 { 3564 GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate); 3565 3566 i915->ggtt.invalidate = guc_ggtt_invalidate; 3567 } 3568 3569 void i915_ggtt_disable_guc(struct drm_i915_private *i915) 3570 { 3571 /* We should only be called after i915_ggtt_enable_guc() */ 3572 GEM_BUG_ON(i915->ggtt.invalidate != guc_ggtt_invalidate); 3573 3574 i915->ggtt.invalidate = gen6_ggtt_invalidate; 3575 } 3576 3577 void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) 3578 { 3579 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3580 struct drm_i915_gem_object *obj, *on; 3581 3582 i915_check_and_clear_faults(dev_priv); 3583 3584 /* First fill our portion of the GTT with scratch pages */ 3585 ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total); 3586 3587 ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */ 3588 3589 /* clflush objects bound into the GGTT and rebind them. */ 3590 list_for_each_entry_safe(obj, on, &dev_priv->mm.bound_list, mm.link) { 3591 bool ggtt_bound = false; 3592 struct i915_vma *vma; 3593 3594 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3595 if (vma->vm != &ggtt->base) 3596 continue; 3597 3598 if (!i915_vma_unbind(vma)) 3599 continue; 3600 3601 WARN_ON(i915_vma_bind(vma, obj->cache_level, 3602 PIN_UPDATE)); 3603 ggtt_bound = true; 3604 } 3605 3606 if (ggtt_bound) 3607 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); 3608 } 3609 3610 ggtt->base.closed = false; 3611 3612 if (INTEL_GEN(dev_priv) >= 8) { 3613 struct intel_ppat *ppat = &dev_priv->ppat; 3614 3615 bitmap_set(ppat->dirty, 0, ppat->max_entries); 3616 dev_priv->ppat.update_hw(dev_priv); 3617 return; 3618 } 3619 3620 if (USES_PPGTT(dev_priv)) { 3621 struct i915_address_space *vm; 3622 3623 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 3624 struct i915_hw_ppgtt *ppgtt; 3625 3626 if (i915_is_ggtt(vm)) 3627 ppgtt = dev_priv->mm.aliasing_ppgtt; 3628 else 3629 ppgtt = i915_vm_to_ppgtt(vm); 3630 3631 gen6_write_page_range(ppgtt, 0, ppgtt->base.total); 3632 } 3633 } 3634 3635 i915_ggtt_invalidate(dev_priv); 3636 } 3637 3638 static struct scatterlist * 3639 rotate_pages(const dma_addr_t *in, unsigned int offset, 3640 unsigned int width, unsigned int height, 3641 unsigned int stride, 3642 struct sg_table *st, struct scatterlist *sg) 3643 { 3644 unsigned int column, row; 3645 unsigned int src_idx; 3646 3647 for (column = 0; column < width; column++) { 3648 src_idx = stride * (height - 1) + column; 3649 for (row = 0; row < height; row++) { 3650 st->nents++; 3651 /* We don't need the pages, but need to initialize 3652 * the entries so the sg list can be happily traversed. 3653 * The only thing we need are DMA addresses. 3654 */ 3655 sg_set_page(sg, NULL, PAGE_SIZE, 0); 3656 sg_dma_address(sg) = in[offset + src_idx]; 3657 sg_dma_len(sg) = PAGE_SIZE; 3658 sg = sg_next(sg); 3659 src_idx -= stride; 3660 } 3661 } 3662 3663 return sg; 3664 } 3665 3666 static noinline struct sg_table * 3667 intel_rotate_pages(struct intel_rotation_info *rot_info, 3668 struct drm_i915_gem_object *obj) 3669 { 3670 const unsigned long n_pages = obj->base.size / PAGE_SIZE; 3671 unsigned int size = intel_rotation_info_size(rot_info); 3672 struct sgt_iter sgt_iter; 3673 dma_addr_t dma_addr; 3674 unsigned long i; 3675 dma_addr_t *page_addr_list; 3676 struct sg_table *st; 3677 struct scatterlist *sg; 3678 int ret = -ENOMEM; 3679 3680 /* Allocate a temporary list of source pages for random access. */ 3681 page_addr_list = kvmalloc_array(n_pages, 3682 sizeof(dma_addr_t), 3683 GFP_KERNEL); 3684 if (!page_addr_list) 3685 return ERR_PTR(ret); 3686 3687 /* Allocate target SG list. */ 3688 st = kmalloc(sizeof(*st), GFP_KERNEL); 3689 if (!st) 3690 goto err_st_alloc; 3691 3692 ret = sg_alloc_table(st, size, GFP_KERNEL); 3693 if (ret) 3694 goto err_sg_alloc; 3695 3696 /* Populate source page list from the object. */ 3697 i = 0; 3698 for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages) 3699 page_addr_list[i++] = dma_addr; 3700 3701 GEM_BUG_ON(i != n_pages); 3702 st->nents = 0; 3703 sg = st->sgl; 3704 3705 for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { 3706 sg = rotate_pages(page_addr_list, rot_info->plane[i].offset, 3707 rot_info->plane[i].width, rot_info->plane[i].height, 3708 rot_info->plane[i].stride, st, sg); 3709 } 3710 3711 DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n", 3712 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); 3713 3714 kvfree(page_addr_list); 3715 3716 return st; 3717 3718 err_sg_alloc: 3719 kfree(st); 3720 err_st_alloc: 3721 kvfree(page_addr_list); 3722 3723 DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", 3724 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); 3725 3726 return ERR_PTR(ret); 3727 } 3728 3729 static noinline struct sg_table * 3730 intel_partial_pages(const struct i915_ggtt_view *view, 3731 struct drm_i915_gem_object *obj) 3732 { 3733 struct sg_table *st; 3734 struct scatterlist *sg, *iter; 3735 unsigned int count = view->partial.size; 3736 unsigned int offset; 3737 int ret = -ENOMEM; 3738 3739 st = kmalloc(sizeof(*st), GFP_KERNEL); 3740 if (!st) 3741 goto err_st_alloc; 3742 3743 ret = sg_alloc_table(st, count, GFP_KERNEL); 3744 if (ret) 3745 goto err_sg_alloc; 3746 3747 iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset); 3748 GEM_BUG_ON(!iter); 3749 3750 sg = st->sgl; 3751 st->nents = 0; 3752 do { 3753 unsigned int len; 3754 3755 len = min(iter->length - (offset << PAGE_SHIFT), 3756 count << PAGE_SHIFT); 3757 sg_set_page(sg, NULL, len, 0); 3758 sg_dma_address(sg) = 3759 sg_dma_address(iter) + (offset << PAGE_SHIFT); 3760 sg_dma_len(sg) = len; 3761 3762 st->nents++; 3763 count -= len >> PAGE_SHIFT; 3764 if (count == 0) { 3765 sg_mark_end(sg); 3766 return st; 3767 } 3768 3769 sg = __sg_next(sg); 3770 iter = __sg_next(iter); 3771 offset = 0; 3772 } while (1); 3773 3774 err_sg_alloc: 3775 kfree(st); 3776 err_st_alloc: 3777 return ERR_PTR(ret); 3778 } 3779 3780 static int 3781 i915_get_ggtt_vma_pages(struct i915_vma *vma) 3782 { 3783 int ret; 3784 3785 /* The vma->pages are only valid within the lifespan of the borrowed 3786 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so 3787 * must be the vma->pages. A simple rule is that vma->pages must only 3788 * be accessed when the obj->mm.pages are pinned. 3789 */ 3790 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); 3791 3792 switch (vma->ggtt_view.type) { 3793 case I915_GGTT_VIEW_NORMAL: 3794 vma->pages = vma->obj->mm.pages; 3795 return 0; 3796 3797 case I915_GGTT_VIEW_ROTATED: 3798 vma->pages = 3799 intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); 3800 break; 3801 3802 case I915_GGTT_VIEW_PARTIAL: 3803 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); 3804 break; 3805 3806 default: 3807 WARN_ONCE(1, "GGTT view %u not implemented!\n", 3808 vma->ggtt_view.type); 3809 return -EINVAL; 3810 } 3811 3812 ret = 0; 3813 if (unlikely(IS_ERR(vma->pages))) { 3814 ret = PTR_ERR(vma->pages); 3815 vma->pages = NULL; 3816 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", 3817 vma->ggtt_view.type, ret); 3818 } 3819 return ret; 3820 } 3821 3822 /** 3823 * i915_gem_gtt_reserve - reserve a node in an address_space (GTT) 3824 * @vm: the &struct i915_address_space 3825 * @node: the &struct drm_mm_node (typically i915_vma.mode) 3826 * @size: how much space to allocate inside the GTT, 3827 * must be #I915_GTT_PAGE_SIZE aligned 3828 * @offset: where to insert inside the GTT, 3829 * must be #I915_GTT_MIN_ALIGNMENT aligned, and the node 3830 * (@offset + @size) must fit within the address space 3831 * @color: color to apply to node, if this node is not from a VMA, 3832 * color must be #I915_COLOR_UNEVICTABLE 3833 * @flags: control search and eviction behaviour 3834 * 3835 * i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside 3836 * the address space (using @size and @color). If the @node does not fit, it 3837 * tries to evict any overlapping nodes from the GTT, including any 3838 * neighbouring nodes if the colors do not match (to ensure guard pages between 3839 * differing domains). See i915_gem_evict_for_node() for the gory details 3840 * on the eviction algorithm. #PIN_NONBLOCK may used to prevent waiting on 3841 * evicting active overlapping objects, and any overlapping node that is pinned 3842 * or marked as unevictable will also result in failure. 3843 * 3844 * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if 3845 * asked to wait for eviction and interrupted. 3846 */ 3847 int i915_gem_gtt_reserve(struct i915_address_space *vm, 3848 struct drm_mm_node *node, 3849 u64 size, u64 offset, unsigned long color, 3850 unsigned int flags) 3851 { 3852 int err; 3853 3854 GEM_BUG_ON(!size); 3855 GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); 3856 GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT)); 3857 GEM_BUG_ON(range_overflows(offset, size, vm->total)); 3858 GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base); 3859 GEM_BUG_ON(drm_mm_node_allocated(node)); 3860 3861 node->size = size; 3862 node->start = offset; 3863 node->color = color; 3864 3865 err = drm_mm_reserve_node(&vm->mm, node); 3866 if (err != -ENOSPC) 3867 return err; 3868 3869 if (flags & PIN_NOEVICT) 3870 return -ENOSPC; 3871 3872 err = i915_gem_evict_for_node(vm, node, flags); 3873 if (err == 0) 3874 err = drm_mm_reserve_node(&vm->mm, node); 3875 3876 return err; 3877 } 3878 3879 static u64 random_offset(u64 start, u64 end, u64 len, u64 align) 3880 { 3881 u64 range, addr; 3882 3883 GEM_BUG_ON(range_overflows(start, len, end)); 3884 GEM_BUG_ON(round_up(start, align) > round_down(end - len, align)); 3885 3886 range = round_down(end - len, align) - round_up(start, align); 3887 if (range) { 3888 if (sizeof(unsigned long) == sizeof(u64)) { 3889 addr = get_random_long(); 3890 } else { 3891 addr = get_random_int(); 3892 if (range > U32_MAX) { 3893 addr <<= 32; 3894 addr |= get_random_int(); 3895 } 3896 } 3897 div64_u64_rem(addr, range, &addr); 3898 start += addr; 3899 } 3900 3901 return round_up(start, align); 3902 } 3903 3904 /** 3905 * i915_gem_gtt_insert - insert a node into an address_space (GTT) 3906 * @vm: the &struct i915_address_space 3907 * @node: the &struct drm_mm_node (typically i915_vma.node) 3908 * @size: how much space to allocate inside the GTT, 3909 * must be #I915_GTT_PAGE_SIZE aligned 3910 * @alignment: required alignment of starting offset, may be 0 but 3911 * if specified, this must be a power-of-two and at least 3912 * #I915_GTT_MIN_ALIGNMENT 3913 * @color: color to apply to node 3914 * @start: start of any range restriction inside GTT (0 for all), 3915 * must be #I915_GTT_PAGE_SIZE aligned 3916 * @end: end of any range restriction inside GTT (U64_MAX for all), 3917 * must be #I915_GTT_PAGE_SIZE aligned if not U64_MAX 3918 * @flags: control search and eviction behaviour 3919 * 3920 * i915_gem_gtt_insert() first searches for an available hole into which 3921 * is can insert the node. The hole address is aligned to @alignment and 3922 * its @size must then fit entirely within the [@start, @end] bounds. The 3923 * nodes on either side of the hole must match @color, or else a guard page 3924 * will be inserted between the two nodes (or the node evicted). If no 3925 * suitable hole is found, first a victim is randomly selected and tested 3926 * for eviction, otherwise then the LRU list of objects within the GTT 3927 * is scanned to find the first set of replacement nodes to create the hole. 3928 * Those old overlapping nodes are evicted from the GTT (and so must be 3929 * rebound before any future use). Any node that is currently pinned cannot 3930 * be evicted (see i915_vma_pin()). Similar if the node's VMA is currently 3931 * active and #PIN_NONBLOCK is specified, that node is also skipped when 3932 * searching for an eviction candidate. See i915_gem_evict_something() for 3933 * the gory details on the eviction algorithm. 3934 * 3935 * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if 3936 * asked to wait for eviction and interrupted. 3937 */ 3938 int i915_gem_gtt_insert(struct i915_address_space *vm, 3939 struct drm_mm_node *node, 3940 u64 size, u64 alignment, unsigned long color, 3941 u64 start, u64 end, unsigned int flags) 3942 { 3943 enum drm_mm_insert_mode mode; 3944 u64 offset; 3945 int err; 3946 3947 lockdep_assert_held(&vm->i915->drm.struct_mutex); 3948 GEM_BUG_ON(!size); 3949 GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); 3950 GEM_BUG_ON(alignment && !is_power_of_2(alignment)); 3951 GEM_BUG_ON(alignment && !IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT)); 3952 GEM_BUG_ON(start >= end); 3953 GEM_BUG_ON(start > 0 && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); 3954 GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); 3955 GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base); 3956 GEM_BUG_ON(drm_mm_node_allocated(node)); 3957 3958 if (unlikely(range_overflows(start, size, end))) 3959 return -ENOSPC; 3960 3961 if (unlikely(round_up(start, alignment) > round_down(end - size, alignment))) 3962 return -ENOSPC; 3963 3964 mode = DRM_MM_INSERT_BEST; 3965 if (flags & PIN_HIGH) 3966 mode = DRM_MM_INSERT_HIGH; 3967 if (flags & PIN_MAPPABLE) 3968 mode = DRM_MM_INSERT_LOW; 3969 3970 /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks, 3971 * so we know that we always have a minimum alignment of 4096. 3972 * The drm_mm range manager is optimised to return results 3973 * with zero alignment, so where possible use the optimal 3974 * path. 3975 */ 3976 BUILD_BUG_ON(I915_GTT_MIN_ALIGNMENT > I915_GTT_PAGE_SIZE); 3977 if (alignment <= I915_GTT_MIN_ALIGNMENT) 3978 alignment = 0; 3979 3980 err = drm_mm_insert_node_in_range(&vm->mm, node, 3981 size, alignment, color, 3982 start, end, mode); 3983 if (err != -ENOSPC) 3984 return err; 3985 3986 if (flags & PIN_NOEVICT) 3987 return -ENOSPC; 3988 3989 /* No free space, pick a slot at random. 3990 * 3991 * There is a pathological case here using a GTT shared between 3992 * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt): 3993 * 3994 * |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->| 3995 * (64k objects) (448k objects) 3996 * 3997 * Now imagine that the eviction LRU is ordered top-down (just because 3998 * pathology meets real life), and that we need to evict an object to 3999 * make room inside the aperture. The eviction scan then has to walk 4000 * the 448k list before it finds one within range. And now imagine that 4001 * it has to search for a new hole between every byte inside the memcpy, 4002 * for several simultaneous clients. 4003 * 4004 * On a full-ppgtt system, if we have run out of available space, there 4005 * will be lots and lots of objects in the eviction list! Again, 4006 * searching that LRU list may be slow if we are also applying any 4007 * range restrictions (e.g. restriction to low 4GiB) and so, for 4008 * simplicity and similarilty between different GTT, try the single 4009 * random replacement first. 4010 */ 4011 offset = random_offset(start, end, 4012 size, alignment ?: I915_GTT_MIN_ALIGNMENT); 4013 err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags); 4014 if (err != -ENOSPC) 4015 return err; 4016 4017 /* Randomly selected placement is pinned, do a search */ 4018 err = i915_gem_evict_something(vm, size, alignment, color, 4019 start, end, flags); 4020 if (err) 4021 return err; 4022 4023 return drm_mm_insert_node_in_range(&vm->mm, node, 4024 size, alignment, color, 4025 start, end, DRM_MM_INSERT_EVICT); 4026 } 4027 4028 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 4029 #include "selftests/mock_gtt.c" 4030 #include "selftests/i915_gem_gtt.c" 4031 #endif 4032