1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/seq_file.h> 27 #include <linux/stop_machine.h> 28 #include <drm/drmP.h> 29 #include <drm/i915_drm.h> 30 #include "i915_drv.h" 31 #include "i915_vgpu.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 #include "intel_frontbuffer.h" 35 36 #define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM) 37 38 /** 39 * DOC: Global GTT views 40 * 41 * Background and previous state 42 * 43 * Historically objects could exists (be bound) in global GTT space only as 44 * singular instances with a view representing all of the object's backing pages 45 * in a linear fashion. This view will be called a normal view. 46 * 47 * To support multiple views of the same object, where the number of mapped 48 * pages is not equal to the backing store, or where the layout of the pages 49 * is not linear, concept of a GGTT view was added. 50 * 51 * One example of an alternative view is a stereo display driven by a single 52 * image. In this case we would have a framebuffer looking like this 53 * (2x2 pages): 54 * 55 * 12 56 * 34 57 * 58 * Above would represent a normal GGTT view as normally mapped for GPU or CPU 59 * rendering. In contrast, fed to the display engine would be an alternative 60 * view which could look something like this: 61 * 62 * 1212 63 * 3434 64 * 65 * In this example both the size and layout of pages in the alternative view is 66 * different from the normal view. 67 * 68 * Implementation and usage 69 * 70 * GGTT views are implemented using VMAs and are distinguished via enum 71 * i915_ggtt_view_type and struct i915_ggtt_view. 72 * 73 * A new flavour of core GEM functions which work with GGTT bound objects were 74 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid 75 * renaming in large amounts of code. They take the struct i915_ggtt_view 76 * parameter encapsulating all metadata required to implement a view. 77 * 78 * As a helper for callers which are only interested in the normal view, 79 * globally const i915_ggtt_view_normal singleton instance exists. All old core 80 * GEM API functions, the ones not taking the view parameter, are operating on, 81 * or with the normal GGTT view. 82 * 83 * Code wanting to add or use a new GGTT view needs to: 84 * 85 * 1. Add a new enum with a suitable name. 86 * 2. Extend the metadata in the i915_ggtt_view structure if required. 87 * 3. Add support to i915_get_vma_pages(). 88 * 89 * New views are required to build a scatter-gather table from within the 90 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and 91 * exists for the lifetime of an VMA. 92 * 93 * Core API is designed to have copy semantics which means that passed in 94 * struct i915_ggtt_view does not need to be persistent (left around after 95 * calling the core API functions). 96 * 97 */ 98 99 static int 100 i915_get_ggtt_vma_pages(struct i915_vma *vma); 101 102 const struct i915_ggtt_view i915_ggtt_view_normal = { 103 .type = I915_GGTT_VIEW_NORMAL, 104 }; 105 const struct i915_ggtt_view i915_ggtt_view_rotated = { 106 .type = I915_GGTT_VIEW_ROTATED, 107 }; 108 109 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, 110 int enable_ppgtt) 111 { 112 bool has_aliasing_ppgtt; 113 bool has_full_ppgtt; 114 bool has_full_48bit_ppgtt; 115 116 has_aliasing_ppgtt = INTEL_GEN(dev_priv) >= 6; 117 has_full_ppgtt = INTEL_GEN(dev_priv) >= 7; 118 has_full_48bit_ppgtt = 119 IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9; 120 121 if (intel_vgpu_active(dev_priv)) { 122 /* emulation is too hard */ 123 has_full_ppgtt = false; 124 has_full_48bit_ppgtt = false; 125 } 126 127 if (!has_aliasing_ppgtt) 128 return 0; 129 130 /* 131 * We don't allow disabling PPGTT for gen9+ as it's a requirement for 132 * execlists, the sole mechanism available to submit work. 133 */ 134 if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9) 135 return 0; 136 137 if (enable_ppgtt == 1) 138 return 1; 139 140 if (enable_ppgtt == 2 && has_full_ppgtt) 141 return 2; 142 143 if (enable_ppgtt == 3 && has_full_48bit_ppgtt) 144 return 3; 145 146 #ifdef CONFIG_INTEL_IOMMU 147 /* Disable ppgtt on SNB if VT-d is on. */ 148 if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) { 149 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 150 return 0; 151 } 152 #endif 153 154 /* Early VLV doesn't have this */ 155 if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) { 156 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n"); 157 return 0; 158 } 159 160 if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt) 161 return has_full_48bit_ppgtt ? 3 : 2; 162 else 163 return has_aliasing_ppgtt ? 1 : 0; 164 } 165 166 static int ppgtt_bind_vma(struct i915_vma *vma, 167 enum i915_cache_level cache_level, 168 u32 unused) 169 { 170 u32 pte_flags = 0; 171 172 vma->pages = vma->obj->mm.pages; 173 174 /* Currently applicable only to VLV */ 175 if (vma->obj->gt_ro) 176 pte_flags |= PTE_READ_ONLY; 177 178 vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, 179 cache_level, pte_flags); 180 181 return 0; 182 } 183 184 static void ppgtt_unbind_vma(struct i915_vma *vma) 185 { 186 vma->vm->clear_range(vma->vm, 187 vma->node.start, 188 vma->size); 189 } 190 191 static gen8_pte_t gen8_pte_encode(dma_addr_t addr, 192 enum i915_cache_level level) 193 { 194 gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW; 195 pte |= addr; 196 197 switch (level) { 198 case I915_CACHE_NONE: 199 pte |= PPAT_UNCACHED_INDEX; 200 break; 201 case I915_CACHE_WT: 202 pte |= PPAT_DISPLAY_ELLC_INDEX; 203 break; 204 default: 205 pte |= PPAT_CACHED_INDEX; 206 break; 207 } 208 209 return pte; 210 } 211 212 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, 213 const enum i915_cache_level level) 214 { 215 gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 216 pde |= addr; 217 if (level != I915_CACHE_NONE) 218 pde |= PPAT_CACHED_PDE_INDEX; 219 else 220 pde |= PPAT_UNCACHED_INDEX; 221 return pde; 222 } 223 224 #define gen8_pdpe_encode gen8_pde_encode 225 #define gen8_pml4e_encode gen8_pde_encode 226 227 static gen6_pte_t snb_pte_encode(dma_addr_t addr, 228 enum i915_cache_level level, 229 u32 unused) 230 { 231 gen6_pte_t pte = GEN6_PTE_VALID; 232 pte |= GEN6_PTE_ADDR_ENCODE(addr); 233 234 switch (level) { 235 case I915_CACHE_L3_LLC: 236 case I915_CACHE_LLC: 237 pte |= GEN6_PTE_CACHE_LLC; 238 break; 239 case I915_CACHE_NONE: 240 pte |= GEN6_PTE_UNCACHED; 241 break; 242 default: 243 MISSING_CASE(level); 244 } 245 246 return pte; 247 } 248 249 static gen6_pte_t ivb_pte_encode(dma_addr_t addr, 250 enum i915_cache_level level, 251 u32 unused) 252 { 253 gen6_pte_t pte = GEN6_PTE_VALID; 254 pte |= GEN6_PTE_ADDR_ENCODE(addr); 255 256 switch (level) { 257 case I915_CACHE_L3_LLC: 258 pte |= GEN7_PTE_CACHE_L3_LLC; 259 break; 260 case I915_CACHE_LLC: 261 pte |= GEN6_PTE_CACHE_LLC; 262 break; 263 case I915_CACHE_NONE: 264 pte |= GEN6_PTE_UNCACHED; 265 break; 266 default: 267 MISSING_CASE(level); 268 } 269 270 return pte; 271 } 272 273 static gen6_pte_t byt_pte_encode(dma_addr_t addr, 274 enum i915_cache_level level, 275 u32 flags) 276 { 277 gen6_pte_t pte = GEN6_PTE_VALID; 278 pte |= GEN6_PTE_ADDR_ENCODE(addr); 279 280 if (!(flags & PTE_READ_ONLY)) 281 pte |= BYT_PTE_WRITEABLE; 282 283 if (level != I915_CACHE_NONE) 284 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 285 286 return pte; 287 } 288 289 static gen6_pte_t hsw_pte_encode(dma_addr_t addr, 290 enum i915_cache_level level, 291 u32 unused) 292 { 293 gen6_pte_t pte = GEN6_PTE_VALID; 294 pte |= HSW_PTE_ADDR_ENCODE(addr); 295 296 if (level != I915_CACHE_NONE) 297 pte |= HSW_WB_LLC_AGE3; 298 299 return pte; 300 } 301 302 static gen6_pte_t iris_pte_encode(dma_addr_t addr, 303 enum i915_cache_level level, 304 u32 unused) 305 { 306 gen6_pte_t pte = GEN6_PTE_VALID; 307 pte |= HSW_PTE_ADDR_ENCODE(addr); 308 309 switch (level) { 310 case I915_CACHE_NONE: 311 break; 312 case I915_CACHE_WT: 313 pte |= HSW_WT_ELLC_LLC_AGE3; 314 break; 315 default: 316 pte |= HSW_WB_ELLC_LLC_AGE3; 317 break; 318 } 319 320 return pte; 321 } 322 323 static int __setup_page_dma(struct drm_i915_private *dev_priv, 324 struct i915_page_dma *p, gfp_t flags) 325 { 326 struct device *kdev = &dev_priv->drm.pdev->dev; 327 328 p->page = alloc_page(flags); 329 if (!p->page) 330 return -ENOMEM; 331 332 p->daddr = dma_map_page(kdev, 333 p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL); 334 335 if (dma_mapping_error(kdev, p->daddr)) { 336 __free_page(p->page); 337 return -EINVAL; 338 } 339 340 return 0; 341 } 342 343 static int setup_page_dma(struct drm_i915_private *dev_priv, 344 struct i915_page_dma *p) 345 { 346 return __setup_page_dma(dev_priv, p, I915_GFP_DMA); 347 } 348 349 static void cleanup_page_dma(struct drm_i915_private *dev_priv, 350 struct i915_page_dma *p) 351 { 352 struct pci_dev *pdev = dev_priv->drm.pdev; 353 354 if (WARN_ON(!p->page)) 355 return; 356 357 dma_unmap_page(&pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL); 358 __free_page(p->page); 359 memset(p, 0, sizeof(*p)); 360 } 361 362 static void *kmap_page_dma(struct i915_page_dma *p) 363 { 364 return kmap_atomic(p->page); 365 } 366 367 /* We use the flushing unmap only with ppgtt structures: 368 * page directories, page tables and scratch pages. 369 */ 370 static void kunmap_page_dma(struct drm_i915_private *dev_priv, void *vaddr) 371 { 372 /* There are only few exceptions for gen >=6. chv and bxt. 373 * And we are not sure about the latter so play safe for now. 374 */ 375 if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) 376 drm_clflush_virt_range(vaddr, PAGE_SIZE); 377 378 kunmap_atomic(vaddr); 379 } 380 381 #define kmap_px(px) kmap_page_dma(px_base(px)) 382 #define kunmap_px(ppgtt, vaddr) \ 383 kunmap_page_dma(to_i915((ppgtt)->base.dev), (vaddr)) 384 385 #define setup_px(dev_priv, px) setup_page_dma((dev_priv), px_base(px)) 386 #define cleanup_px(dev_priv, px) cleanup_page_dma((dev_priv), px_base(px)) 387 #define fill_px(dev_priv, px, v) fill_page_dma((dev_priv), px_base(px), (v)) 388 #define fill32_px(dev_priv, px, v) \ 389 fill_page_dma_32((dev_priv), px_base(px), (v)) 390 391 static void fill_page_dma(struct drm_i915_private *dev_priv, 392 struct i915_page_dma *p, const uint64_t val) 393 { 394 int i; 395 uint64_t * const vaddr = kmap_page_dma(p); 396 397 for (i = 0; i < 512; i++) 398 vaddr[i] = val; 399 400 kunmap_page_dma(dev_priv, vaddr); 401 } 402 403 static void fill_page_dma_32(struct drm_i915_private *dev_priv, 404 struct i915_page_dma *p, const uint32_t val32) 405 { 406 uint64_t v = val32; 407 408 v = v << 32 | val32; 409 410 fill_page_dma(dev_priv, p, v); 411 } 412 413 static int 414 setup_scratch_page(struct drm_i915_private *dev_priv, 415 struct i915_page_dma *scratch, 416 gfp_t gfp) 417 { 418 return __setup_page_dma(dev_priv, scratch, gfp | __GFP_ZERO); 419 } 420 421 static void cleanup_scratch_page(struct drm_i915_private *dev_priv, 422 struct i915_page_dma *scratch) 423 { 424 cleanup_page_dma(dev_priv, scratch); 425 } 426 427 static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv) 428 { 429 struct i915_page_table *pt; 430 const size_t count = INTEL_GEN(dev_priv) >= 8 ? GEN8_PTES : GEN6_PTES; 431 int ret = -ENOMEM; 432 433 pt = kzalloc(sizeof(*pt), GFP_KERNEL); 434 if (!pt) 435 return ERR_PTR(-ENOMEM); 436 437 pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), 438 GFP_KERNEL); 439 440 if (!pt->used_ptes) 441 goto fail_bitmap; 442 443 ret = setup_px(dev_priv, pt); 444 if (ret) 445 goto fail_page_m; 446 447 return pt; 448 449 fail_page_m: 450 kfree(pt->used_ptes); 451 fail_bitmap: 452 kfree(pt); 453 454 return ERR_PTR(ret); 455 } 456 457 static void free_pt(struct drm_i915_private *dev_priv, 458 struct i915_page_table *pt) 459 { 460 cleanup_px(dev_priv, pt); 461 kfree(pt->used_ptes); 462 kfree(pt); 463 } 464 465 static void gen8_initialize_pt(struct i915_address_space *vm, 466 struct i915_page_table *pt) 467 { 468 gen8_pte_t scratch_pte; 469 470 scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 471 I915_CACHE_LLC); 472 473 fill_px(to_i915(vm->dev), pt, scratch_pte); 474 } 475 476 static void gen6_initialize_pt(struct i915_address_space *vm, 477 struct i915_page_table *pt) 478 { 479 gen6_pte_t scratch_pte; 480 481 WARN_ON(vm->scratch_page.daddr == 0); 482 483 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 484 I915_CACHE_LLC, 0); 485 486 fill32_px(to_i915(vm->dev), pt, scratch_pte); 487 } 488 489 static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv) 490 { 491 struct i915_page_directory *pd; 492 int ret = -ENOMEM; 493 494 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 495 if (!pd) 496 return ERR_PTR(-ENOMEM); 497 498 pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES), 499 sizeof(*pd->used_pdes), GFP_KERNEL); 500 if (!pd->used_pdes) 501 goto fail_bitmap; 502 503 ret = setup_px(dev_priv, pd); 504 if (ret) 505 goto fail_page_m; 506 507 return pd; 508 509 fail_page_m: 510 kfree(pd->used_pdes); 511 fail_bitmap: 512 kfree(pd); 513 514 return ERR_PTR(ret); 515 } 516 517 static void free_pd(struct drm_i915_private *dev_priv, 518 struct i915_page_directory *pd) 519 { 520 if (px_page(pd)) { 521 cleanup_px(dev_priv, pd); 522 kfree(pd->used_pdes); 523 kfree(pd); 524 } 525 } 526 527 static void gen8_initialize_pd(struct i915_address_space *vm, 528 struct i915_page_directory *pd) 529 { 530 gen8_pde_t scratch_pde; 531 532 scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC); 533 534 fill_px(to_i915(vm->dev), pd, scratch_pde); 535 } 536 537 static int __pdp_init(struct drm_i915_private *dev_priv, 538 struct i915_page_directory_pointer *pdp) 539 { 540 size_t pdpes = I915_PDPES_PER_PDP(dev_priv); 541 542 pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), 543 sizeof(unsigned long), 544 GFP_KERNEL); 545 if (!pdp->used_pdpes) 546 return -ENOMEM; 547 548 pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory), 549 GFP_KERNEL); 550 if (!pdp->page_directory) { 551 kfree(pdp->used_pdpes); 552 /* the PDP might be the statically allocated top level. Keep it 553 * as clean as possible */ 554 pdp->used_pdpes = NULL; 555 return -ENOMEM; 556 } 557 558 return 0; 559 } 560 561 static void __pdp_fini(struct i915_page_directory_pointer *pdp) 562 { 563 kfree(pdp->used_pdpes); 564 kfree(pdp->page_directory); 565 pdp->page_directory = NULL; 566 } 567 568 static struct 569 i915_page_directory_pointer *alloc_pdp(struct drm_i915_private *dev_priv) 570 { 571 struct i915_page_directory_pointer *pdp; 572 int ret = -ENOMEM; 573 574 WARN_ON(!USES_FULL_48BIT_PPGTT(dev_priv)); 575 576 pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); 577 if (!pdp) 578 return ERR_PTR(-ENOMEM); 579 580 ret = __pdp_init(dev_priv, pdp); 581 if (ret) 582 goto fail_bitmap; 583 584 ret = setup_px(dev_priv, pdp); 585 if (ret) 586 goto fail_page_m; 587 588 return pdp; 589 590 fail_page_m: 591 __pdp_fini(pdp); 592 fail_bitmap: 593 kfree(pdp); 594 595 return ERR_PTR(ret); 596 } 597 598 static void free_pdp(struct drm_i915_private *dev_priv, 599 struct i915_page_directory_pointer *pdp) 600 { 601 __pdp_fini(pdp); 602 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 603 cleanup_px(dev_priv, pdp); 604 kfree(pdp); 605 } 606 } 607 608 static void gen8_initialize_pdp(struct i915_address_space *vm, 609 struct i915_page_directory_pointer *pdp) 610 { 611 gen8_ppgtt_pdpe_t scratch_pdpe; 612 613 scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); 614 615 fill_px(to_i915(vm->dev), pdp, scratch_pdpe); 616 } 617 618 static void gen8_initialize_pml4(struct i915_address_space *vm, 619 struct i915_pml4 *pml4) 620 { 621 gen8_ppgtt_pml4e_t scratch_pml4e; 622 623 scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), 624 I915_CACHE_LLC); 625 626 fill_px(to_i915(vm->dev), pml4, scratch_pml4e); 627 } 628 629 static void 630 gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt, 631 struct i915_page_directory_pointer *pdp, 632 struct i915_page_directory *pd, 633 int index) 634 { 635 gen8_ppgtt_pdpe_t *page_directorypo; 636 637 if (!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev))) 638 return; 639 640 page_directorypo = kmap_px(pdp); 641 page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); 642 kunmap_px(ppgtt, page_directorypo); 643 } 644 645 static void 646 gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt, 647 struct i915_pml4 *pml4, 648 struct i915_page_directory_pointer *pdp, 649 int index) 650 { 651 gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); 652 653 WARN_ON(!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev))); 654 pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); 655 kunmap_px(ppgtt, pagemap); 656 } 657 658 /* Broadwell Page Directory Pointer Descriptors */ 659 static int gen8_write_pdp(struct drm_i915_gem_request *req, 660 unsigned entry, 661 dma_addr_t addr) 662 { 663 struct intel_ring *ring = req->ring; 664 struct intel_engine_cs *engine = req->engine; 665 int ret; 666 667 BUG_ON(entry >= 4); 668 669 ret = intel_ring_begin(req, 6); 670 if (ret) 671 return ret; 672 673 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 674 intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, entry)); 675 intel_ring_emit(ring, upper_32_bits(addr)); 676 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 677 intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, entry)); 678 intel_ring_emit(ring, lower_32_bits(addr)); 679 intel_ring_advance(ring); 680 681 return 0; 682 } 683 684 static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, 685 struct drm_i915_gem_request *req) 686 { 687 int i, ret; 688 689 for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { 690 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); 691 692 ret = gen8_write_pdp(req, i, pd_daddr); 693 if (ret) 694 return ret; 695 } 696 697 return 0; 698 } 699 700 static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, 701 struct drm_i915_gem_request *req) 702 { 703 return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); 704 } 705 706 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify 707 * the page table structures, we mark them dirty so that 708 * context switching/execlist queuing code takes extra steps 709 * to ensure that tlbs are flushed. 710 */ 711 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) 712 { 713 ppgtt->pd_dirty_rings = INTEL_INFO(to_i915(ppgtt->base.dev))->ring_mask; 714 } 715 716 /* Removes entries from a single page table, releasing it if it's empty. 717 * Caller can use the return value to update higher-level entries. 718 */ 719 static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, 720 struct i915_page_table *pt, 721 uint64_t start, 722 uint64_t length) 723 { 724 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 725 unsigned int num_entries = gen8_pte_count(start, length); 726 unsigned int pte = gen8_pte_index(start); 727 unsigned int pte_end = pte + num_entries; 728 gen8_pte_t *pt_vaddr; 729 gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 730 I915_CACHE_LLC); 731 732 if (WARN_ON(!px_page(pt))) 733 return false; 734 735 GEM_BUG_ON(pte_end > GEN8_PTES); 736 737 bitmap_clear(pt->used_ptes, pte, num_entries); 738 739 if (bitmap_empty(pt->used_ptes, GEN8_PTES)) { 740 free_pt(to_i915(vm->dev), pt); 741 return true; 742 } 743 744 pt_vaddr = kmap_px(pt); 745 746 while (pte < pte_end) 747 pt_vaddr[pte++] = scratch_pte; 748 749 kunmap_px(ppgtt, pt_vaddr); 750 751 return false; 752 } 753 754 /* Removes entries from a single page dir, releasing it if it's empty. 755 * Caller can use the return value to update higher-level entries 756 */ 757 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, 758 struct i915_page_directory *pd, 759 uint64_t start, 760 uint64_t length) 761 { 762 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 763 struct i915_page_table *pt; 764 uint64_t pde; 765 gen8_pde_t *pde_vaddr; 766 gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), 767 I915_CACHE_LLC); 768 769 gen8_for_each_pde(pt, pd, start, length, pde) { 770 if (WARN_ON(!pd->page_table[pde])) 771 break; 772 773 if (gen8_ppgtt_clear_pt(vm, pt, start, length)) { 774 __clear_bit(pde, pd->used_pdes); 775 pde_vaddr = kmap_px(pd); 776 pde_vaddr[pde] = scratch_pde; 777 kunmap_px(ppgtt, pde_vaddr); 778 } 779 } 780 781 if (bitmap_empty(pd->used_pdes, I915_PDES)) { 782 free_pd(to_i915(vm->dev), pd); 783 return true; 784 } 785 786 return false; 787 } 788 789 /* Removes entries from a single page dir pointer, releasing it if it's empty. 790 * Caller can use the return value to update higher-level entries 791 */ 792 static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, 793 struct i915_page_directory_pointer *pdp, 794 uint64_t start, 795 uint64_t length) 796 { 797 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 798 struct drm_i915_private *dev_priv = to_i915(vm->dev); 799 struct i915_page_directory *pd; 800 uint64_t pdpe; 801 gen8_ppgtt_pdpe_t *pdpe_vaddr; 802 gen8_ppgtt_pdpe_t scratch_pdpe = 803 gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); 804 805 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 806 if (WARN_ON(!pdp->page_directory[pdpe])) 807 break; 808 809 if (gen8_ppgtt_clear_pd(vm, pd, start, length)) { 810 __clear_bit(pdpe, pdp->used_pdpes); 811 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 812 pdpe_vaddr = kmap_px(pdp); 813 pdpe_vaddr[pdpe] = scratch_pdpe; 814 kunmap_px(ppgtt, pdpe_vaddr); 815 } 816 } 817 } 818 819 mark_tlbs_dirty(ppgtt); 820 821 if (USES_FULL_48BIT_PPGTT(dev_priv) && 822 bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv))) { 823 free_pdp(dev_priv, pdp); 824 return true; 825 } 826 827 return false; 828 } 829 830 /* Removes entries from a single pml4. 831 * This is the top-level structure in 4-level page tables used on gen8+. 832 * Empty entries are always scratch pml4e. 833 */ 834 static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, 835 struct i915_pml4 *pml4, 836 uint64_t start, 837 uint64_t length) 838 { 839 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 840 struct i915_page_directory_pointer *pdp; 841 uint64_t pml4e; 842 gen8_ppgtt_pml4e_t *pml4e_vaddr; 843 gen8_ppgtt_pml4e_t scratch_pml4e = 844 gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC); 845 846 GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(to_i915(vm->dev))); 847 848 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 849 if (WARN_ON(!pml4->pdps[pml4e])) 850 break; 851 852 if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) { 853 __clear_bit(pml4e, pml4->used_pml4es); 854 pml4e_vaddr = kmap_px(pml4); 855 pml4e_vaddr[pml4e] = scratch_pml4e; 856 kunmap_px(ppgtt, pml4e_vaddr); 857 } 858 } 859 } 860 861 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 862 uint64_t start, uint64_t length) 863 { 864 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 865 866 if (USES_FULL_48BIT_PPGTT(to_i915(vm->dev))) 867 gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length); 868 else 869 gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length); 870 } 871 872 static void 873 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, 874 struct i915_page_directory_pointer *pdp, 875 struct sg_page_iter *sg_iter, 876 uint64_t start, 877 enum i915_cache_level cache_level) 878 { 879 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 880 gen8_pte_t *pt_vaddr; 881 unsigned pdpe = gen8_pdpe_index(start); 882 unsigned pde = gen8_pde_index(start); 883 unsigned pte = gen8_pte_index(start); 884 885 pt_vaddr = NULL; 886 887 while (__sg_page_iter_next(sg_iter)) { 888 if (pt_vaddr == NULL) { 889 struct i915_page_directory *pd = pdp->page_directory[pdpe]; 890 struct i915_page_table *pt = pd->page_table[pde]; 891 pt_vaddr = kmap_px(pt); 892 } 893 894 pt_vaddr[pte] = 895 gen8_pte_encode(sg_page_iter_dma_address(sg_iter), 896 cache_level); 897 if (++pte == GEN8_PTES) { 898 kunmap_px(ppgtt, pt_vaddr); 899 pt_vaddr = NULL; 900 if (++pde == I915_PDES) { 901 if (++pdpe == I915_PDPES_PER_PDP(to_i915(vm->dev))) 902 break; 903 pde = 0; 904 } 905 pte = 0; 906 } 907 } 908 909 if (pt_vaddr) 910 kunmap_px(ppgtt, pt_vaddr); 911 } 912 913 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 914 struct sg_table *pages, 915 uint64_t start, 916 enum i915_cache_level cache_level, 917 u32 unused) 918 { 919 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 920 struct sg_page_iter sg_iter; 921 922 __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); 923 924 if (!USES_FULL_48BIT_PPGTT(to_i915(vm->dev))) { 925 gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start, 926 cache_level); 927 } else { 928 struct i915_page_directory_pointer *pdp; 929 uint64_t pml4e; 930 uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT; 931 932 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { 933 gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, 934 start, cache_level); 935 } 936 } 937 } 938 939 static void gen8_free_page_tables(struct drm_i915_private *dev_priv, 940 struct i915_page_directory *pd) 941 { 942 int i; 943 944 if (!px_page(pd)) 945 return; 946 947 for_each_set_bit(i, pd->used_pdes, I915_PDES) { 948 if (WARN_ON(!pd->page_table[i])) 949 continue; 950 951 free_pt(dev_priv, pd->page_table[i]); 952 pd->page_table[i] = NULL; 953 } 954 } 955 956 static int gen8_init_scratch(struct i915_address_space *vm) 957 { 958 struct drm_i915_private *dev_priv = to_i915(vm->dev); 959 int ret; 960 961 ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA); 962 if (ret) 963 return ret; 964 965 vm->scratch_pt = alloc_pt(dev_priv); 966 if (IS_ERR(vm->scratch_pt)) { 967 ret = PTR_ERR(vm->scratch_pt); 968 goto free_scratch_page; 969 } 970 971 vm->scratch_pd = alloc_pd(dev_priv); 972 if (IS_ERR(vm->scratch_pd)) { 973 ret = PTR_ERR(vm->scratch_pd); 974 goto free_pt; 975 } 976 977 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 978 vm->scratch_pdp = alloc_pdp(dev_priv); 979 if (IS_ERR(vm->scratch_pdp)) { 980 ret = PTR_ERR(vm->scratch_pdp); 981 goto free_pd; 982 } 983 } 984 985 gen8_initialize_pt(vm, vm->scratch_pt); 986 gen8_initialize_pd(vm, vm->scratch_pd); 987 if (USES_FULL_48BIT_PPGTT(dev_priv)) 988 gen8_initialize_pdp(vm, vm->scratch_pdp); 989 990 return 0; 991 992 free_pd: 993 free_pd(dev_priv, vm->scratch_pd); 994 free_pt: 995 free_pt(dev_priv, vm->scratch_pt); 996 free_scratch_page: 997 cleanup_scratch_page(dev_priv, &vm->scratch_page); 998 999 return ret; 1000 } 1001 1002 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) 1003 { 1004 enum vgt_g2v_type msg; 1005 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 1006 int i; 1007 1008 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 1009 u64 daddr = px_dma(&ppgtt->pml4); 1010 1011 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); 1012 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); 1013 1014 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : 1015 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); 1016 } else { 1017 for (i = 0; i < GEN8_LEGACY_PDPES; i++) { 1018 u64 daddr = i915_page_dir_dma_addr(ppgtt, i); 1019 1020 I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); 1021 I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); 1022 } 1023 1024 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : 1025 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); 1026 } 1027 1028 I915_WRITE(vgtif_reg(g2v_notify), msg); 1029 1030 return 0; 1031 } 1032 1033 static void gen8_free_scratch(struct i915_address_space *vm) 1034 { 1035 struct drm_i915_private *dev_priv = to_i915(vm->dev); 1036 1037 if (USES_FULL_48BIT_PPGTT(dev_priv)) 1038 free_pdp(dev_priv, vm->scratch_pdp); 1039 free_pd(dev_priv, vm->scratch_pd); 1040 free_pt(dev_priv, vm->scratch_pt); 1041 cleanup_scratch_page(dev_priv, &vm->scratch_page); 1042 } 1043 1044 static void gen8_ppgtt_cleanup_3lvl(struct drm_i915_private *dev_priv, 1045 struct i915_page_directory_pointer *pdp) 1046 { 1047 int i; 1048 1049 for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)) { 1050 if (WARN_ON(!pdp->page_directory[i])) 1051 continue; 1052 1053 gen8_free_page_tables(dev_priv, pdp->page_directory[i]); 1054 free_pd(dev_priv, pdp->page_directory[i]); 1055 } 1056 1057 free_pdp(dev_priv, pdp); 1058 } 1059 1060 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) 1061 { 1062 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 1063 int i; 1064 1065 for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { 1066 if (WARN_ON(!ppgtt->pml4.pdps[i])) 1067 continue; 1068 1069 gen8_ppgtt_cleanup_3lvl(dev_priv, ppgtt->pml4.pdps[i]); 1070 } 1071 1072 cleanup_px(dev_priv, &ppgtt->pml4); 1073 } 1074 1075 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 1076 { 1077 struct drm_i915_private *dev_priv = to_i915(vm->dev); 1078 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1079 1080 if (intel_vgpu_active(dev_priv)) 1081 gen8_ppgtt_notify_vgt(ppgtt, false); 1082 1083 if (!USES_FULL_48BIT_PPGTT(dev_priv)) 1084 gen8_ppgtt_cleanup_3lvl(dev_priv, &ppgtt->pdp); 1085 else 1086 gen8_ppgtt_cleanup_4lvl(ppgtt); 1087 1088 gen8_free_scratch(vm); 1089 } 1090 1091 /** 1092 * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. 1093 * @vm: Master vm structure. 1094 * @pd: Page directory for this address range. 1095 * @start: Starting virtual address to begin allocations. 1096 * @length: Size of the allocations. 1097 * @new_pts: Bitmap set by function with new allocations. Likely used by the 1098 * caller to free on error. 1099 * 1100 * Allocate the required number of page tables. Extremely similar to 1101 * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by 1102 * the page directory boundary (instead of the page directory pointer). That 1103 * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is 1104 * possible, and likely that the caller will need to use multiple calls of this 1105 * function to achieve the appropriate allocation. 1106 * 1107 * Return: 0 if success; negative error code otherwise. 1108 */ 1109 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, 1110 struct i915_page_directory *pd, 1111 uint64_t start, 1112 uint64_t length, 1113 unsigned long *new_pts) 1114 { 1115 struct drm_i915_private *dev_priv = to_i915(vm->dev); 1116 struct i915_page_table *pt; 1117 uint32_t pde; 1118 1119 gen8_for_each_pde(pt, pd, start, length, pde) { 1120 /* Don't reallocate page tables */ 1121 if (test_bit(pde, pd->used_pdes)) { 1122 /* Scratch is never allocated this way */ 1123 WARN_ON(pt == vm->scratch_pt); 1124 continue; 1125 } 1126 1127 pt = alloc_pt(dev_priv); 1128 if (IS_ERR(pt)) 1129 goto unwind_out; 1130 1131 gen8_initialize_pt(vm, pt); 1132 pd->page_table[pde] = pt; 1133 __set_bit(pde, new_pts); 1134 trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); 1135 } 1136 1137 return 0; 1138 1139 unwind_out: 1140 for_each_set_bit(pde, new_pts, I915_PDES) 1141 free_pt(dev_priv, pd->page_table[pde]); 1142 1143 return -ENOMEM; 1144 } 1145 1146 /** 1147 * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range. 1148 * @vm: Master vm structure. 1149 * @pdp: Page directory pointer for this address range. 1150 * @start: Starting virtual address to begin allocations. 1151 * @length: Size of the allocations. 1152 * @new_pds: Bitmap set by function with new allocations. Likely used by the 1153 * caller to free on error. 1154 * 1155 * Allocate the required number of page directories starting at the pde index of 1156 * @start, and ending at the pde index @start + @length. This function will skip 1157 * over already allocated page directories within the range, and only allocate 1158 * new ones, setting the appropriate pointer within the pdp as well as the 1159 * correct position in the bitmap @new_pds. 1160 * 1161 * The function will only allocate the pages within the range for a give page 1162 * directory pointer. In other words, if @start + @length straddles a virtually 1163 * addressed PDP boundary (512GB for 4k pages), there will be more allocations 1164 * required by the caller, This is not currently possible, and the BUG in the 1165 * code will prevent it. 1166 * 1167 * Return: 0 if success; negative error code otherwise. 1168 */ 1169 static int 1170 gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, 1171 struct i915_page_directory_pointer *pdp, 1172 uint64_t start, 1173 uint64_t length, 1174 unsigned long *new_pds) 1175 { 1176 struct drm_i915_private *dev_priv = to_i915(vm->dev); 1177 struct i915_page_directory *pd; 1178 uint32_t pdpe; 1179 uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); 1180 1181 WARN_ON(!bitmap_empty(new_pds, pdpes)); 1182 1183 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1184 if (test_bit(pdpe, pdp->used_pdpes)) 1185 continue; 1186 1187 pd = alloc_pd(dev_priv); 1188 if (IS_ERR(pd)) 1189 goto unwind_out; 1190 1191 gen8_initialize_pd(vm, pd); 1192 pdp->page_directory[pdpe] = pd; 1193 __set_bit(pdpe, new_pds); 1194 trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT); 1195 } 1196 1197 return 0; 1198 1199 unwind_out: 1200 for_each_set_bit(pdpe, new_pds, pdpes) 1201 free_pd(dev_priv, pdp->page_directory[pdpe]); 1202 1203 return -ENOMEM; 1204 } 1205 1206 /** 1207 * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. 1208 * @vm: Master vm structure. 1209 * @pml4: Page map level 4 for this address range. 1210 * @start: Starting virtual address to begin allocations. 1211 * @length: Size of the allocations. 1212 * @new_pdps: Bitmap set by function with new allocations. Likely used by the 1213 * caller to free on error. 1214 * 1215 * Allocate the required number of page directory pointers. Extremely similar to 1216 * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). 1217 * The main difference is here we are limited by the pml4 boundary (instead of 1218 * the page directory pointer). 1219 * 1220 * Return: 0 if success; negative error code otherwise. 1221 */ 1222 static int 1223 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, 1224 struct i915_pml4 *pml4, 1225 uint64_t start, 1226 uint64_t length, 1227 unsigned long *new_pdps) 1228 { 1229 struct drm_i915_private *dev_priv = to_i915(vm->dev); 1230 struct i915_page_directory_pointer *pdp; 1231 uint32_t pml4e; 1232 1233 WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); 1234 1235 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1236 if (!test_bit(pml4e, pml4->used_pml4es)) { 1237 pdp = alloc_pdp(dev_priv); 1238 if (IS_ERR(pdp)) 1239 goto unwind_out; 1240 1241 gen8_initialize_pdp(vm, pdp); 1242 pml4->pdps[pml4e] = pdp; 1243 __set_bit(pml4e, new_pdps); 1244 trace_i915_page_directory_pointer_entry_alloc(vm, 1245 pml4e, 1246 start, 1247 GEN8_PML4E_SHIFT); 1248 } 1249 } 1250 1251 return 0; 1252 1253 unwind_out: 1254 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1255 free_pdp(dev_priv, pml4->pdps[pml4e]); 1256 1257 return -ENOMEM; 1258 } 1259 1260 static void 1261 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts) 1262 { 1263 kfree(new_pts); 1264 kfree(new_pds); 1265 } 1266 1267 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both 1268 * of these are based on the number of PDPEs in the system. 1269 */ 1270 static 1271 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, 1272 unsigned long **new_pts, 1273 uint32_t pdpes) 1274 { 1275 unsigned long *pds; 1276 unsigned long *pts; 1277 1278 pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY); 1279 if (!pds) 1280 return -ENOMEM; 1281 1282 pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long), 1283 GFP_TEMPORARY); 1284 if (!pts) 1285 goto err_out; 1286 1287 *new_pds = pds; 1288 *new_pts = pts; 1289 1290 return 0; 1291 1292 err_out: 1293 free_gen8_temp_bitmaps(pds, pts); 1294 return -ENOMEM; 1295 } 1296 1297 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, 1298 struct i915_page_directory_pointer *pdp, 1299 uint64_t start, 1300 uint64_t length) 1301 { 1302 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1303 unsigned long *new_page_dirs, *new_page_tables; 1304 struct drm_i915_private *dev_priv = to_i915(vm->dev); 1305 struct i915_page_directory *pd; 1306 const uint64_t orig_start = start; 1307 const uint64_t orig_length = length; 1308 uint32_t pdpe; 1309 uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); 1310 int ret; 1311 1312 /* Wrap is never okay since we can only represent 48b, and we don't 1313 * actually use the other side of the canonical address space. 1314 */ 1315 if (WARN_ON(start + length < start)) 1316 return -ENODEV; 1317 1318 if (WARN_ON(start + length > vm->total)) 1319 return -ENODEV; 1320 1321 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1322 if (ret) 1323 return ret; 1324 1325 /* Do the allocations first so we can easily bail out */ 1326 ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, 1327 new_page_dirs); 1328 if (ret) { 1329 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1330 return ret; 1331 } 1332 1333 /* For every page directory referenced, allocate page tables */ 1334 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1335 ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length, 1336 new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES)); 1337 if (ret) 1338 goto err_out; 1339 } 1340 1341 start = orig_start; 1342 length = orig_length; 1343 1344 /* Allocations have completed successfully, so set the bitmaps, and do 1345 * the mappings. */ 1346 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1347 gen8_pde_t *const page_directory = kmap_px(pd); 1348 struct i915_page_table *pt; 1349 uint64_t pd_len = length; 1350 uint64_t pd_start = start; 1351 uint32_t pde; 1352 1353 /* Every pd should be allocated, we just did that above. */ 1354 WARN_ON(!pd); 1355 1356 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1357 /* Same reasoning as pd */ 1358 WARN_ON(!pt); 1359 WARN_ON(!pd_len); 1360 WARN_ON(!gen8_pte_count(pd_start, pd_len)); 1361 1362 /* Set our used ptes within the page table */ 1363 bitmap_set(pt->used_ptes, 1364 gen8_pte_index(pd_start), 1365 gen8_pte_count(pd_start, pd_len)); 1366 1367 /* Our pde is now pointing to the pagetable, pt */ 1368 __set_bit(pde, pd->used_pdes); 1369 1370 /* Map the PDE to the page table */ 1371 page_directory[pde] = gen8_pde_encode(px_dma(pt), 1372 I915_CACHE_LLC); 1373 trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, 1374 gen8_pte_index(start), 1375 gen8_pte_count(start, length), 1376 GEN8_PTES); 1377 1378 /* NB: We haven't yet mapped ptes to pages. At this 1379 * point we're still relying on insert_entries() */ 1380 } 1381 1382 kunmap_px(ppgtt, page_directory); 1383 __set_bit(pdpe, pdp->used_pdpes); 1384 gen8_setup_page_directory(ppgtt, pdp, pd, pdpe); 1385 } 1386 1387 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1388 mark_tlbs_dirty(ppgtt); 1389 return 0; 1390 1391 err_out: 1392 while (pdpe--) { 1393 unsigned long temp; 1394 1395 for_each_set_bit(temp, new_page_tables + pdpe * 1396 BITS_TO_LONGS(I915_PDES), I915_PDES) 1397 free_pt(dev_priv, 1398 pdp->page_directory[pdpe]->page_table[temp]); 1399 } 1400 1401 for_each_set_bit(pdpe, new_page_dirs, pdpes) 1402 free_pd(dev_priv, pdp->page_directory[pdpe]); 1403 1404 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1405 mark_tlbs_dirty(ppgtt); 1406 return ret; 1407 } 1408 1409 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, 1410 struct i915_pml4 *pml4, 1411 uint64_t start, 1412 uint64_t length) 1413 { 1414 DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); 1415 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1416 struct i915_page_directory_pointer *pdp; 1417 uint64_t pml4e; 1418 int ret = 0; 1419 1420 /* Do the pml4 allocations first, so we don't need to track the newly 1421 * allocated tables below the pdp */ 1422 bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); 1423 1424 /* The pagedirectory and pagetable allocations are done in the shared 3 1425 * and 4 level code. Just allocate the pdps. 1426 */ 1427 ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, 1428 new_pdps); 1429 if (ret) 1430 return ret; 1431 1432 WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, 1433 "The allocation has spanned more than 512GB. " 1434 "It is highly likely this is incorrect."); 1435 1436 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1437 WARN_ON(!pdp); 1438 1439 ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); 1440 if (ret) 1441 goto err_out; 1442 1443 gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e); 1444 } 1445 1446 bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, 1447 GEN8_PML4ES_PER_PML4); 1448 1449 return 0; 1450 1451 err_out: 1452 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1453 gen8_ppgtt_cleanup_3lvl(to_i915(vm->dev), pml4->pdps[pml4e]); 1454 1455 return ret; 1456 } 1457 1458 static int gen8_alloc_va_range(struct i915_address_space *vm, 1459 uint64_t start, uint64_t length) 1460 { 1461 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1462 1463 if (USES_FULL_48BIT_PPGTT(to_i915(vm->dev))) 1464 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); 1465 else 1466 return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); 1467 } 1468 1469 static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, 1470 uint64_t start, uint64_t length, 1471 gen8_pte_t scratch_pte, 1472 struct seq_file *m) 1473 { 1474 struct i915_page_directory *pd; 1475 uint32_t pdpe; 1476 1477 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1478 struct i915_page_table *pt; 1479 uint64_t pd_len = length; 1480 uint64_t pd_start = start; 1481 uint32_t pde; 1482 1483 if (!test_bit(pdpe, pdp->used_pdpes)) 1484 continue; 1485 1486 seq_printf(m, "\tPDPE #%d\n", pdpe); 1487 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1488 uint32_t pte; 1489 gen8_pte_t *pt_vaddr; 1490 1491 if (!test_bit(pde, pd->used_pdes)) 1492 continue; 1493 1494 pt_vaddr = kmap_px(pt); 1495 for (pte = 0; pte < GEN8_PTES; pte += 4) { 1496 uint64_t va = 1497 (pdpe << GEN8_PDPE_SHIFT) | 1498 (pde << GEN8_PDE_SHIFT) | 1499 (pte << GEN8_PTE_SHIFT); 1500 int i; 1501 bool found = false; 1502 1503 for (i = 0; i < 4; i++) 1504 if (pt_vaddr[pte + i] != scratch_pte) 1505 found = true; 1506 if (!found) 1507 continue; 1508 1509 seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte); 1510 for (i = 0; i < 4; i++) { 1511 if (pt_vaddr[pte + i] != scratch_pte) 1512 seq_printf(m, " %llx", pt_vaddr[pte + i]); 1513 else 1514 seq_puts(m, " SCRATCH "); 1515 } 1516 seq_puts(m, "\n"); 1517 } 1518 /* don't use kunmap_px, it could trigger 1519 * an unnecessary flush. 1520 */ 1521 kunmap_atomic(pt_vaddr); 1522 } 1523 } 1524 } 1525 1526 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1527 { 1528 struct i915_address_space *vm = &ppgtt->base; 1529 uint64_t start = ppgtt->base.start; 1530 uint64_t length = ppgtt->base.total; 1531 gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 1532 I915_CACHE_LLC); 1533 1534 if (!USES_FULL_48BIT_PPGTT(to_i915(vm->dev))) { 1535 gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); 1536 } else { 1537 uint64_t pml4e; 1538 struct i915_pml4 *pml4 = &ppgtt->pml4; 1539 struct i915_page_directory_pointer *pdp; 1540 1541 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1542 if (!test_bit(pml4e, pml4->used_pml4es)) 1543 continue; 1544 1545 seq_printf(m, " PML4E #%llu\n", pml4e); 1546 gen8_dump_pdp(pdp, start, length, scratch_pte, m); 1547 } 1548 } 1549 } 1550 1551 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) 1552 { 1553 unsigned long *new_page_dirs, *new_page_tables; 1554 uint32_t pdpes = I915_PDPES_PER_PDP(to_i915(ppgtt->base.dev)); 1555 int ret; 1556 1557 /* We allocate temp bitmap for page tables for no gain 1558 * but as this is for init only, lets keep the things simple 1559 */ 1560 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1561 if (ret) 1562 return ret; 1563 1564 /* Allocate for all pdps regardless of how the ppgtt 1565 * was defined. 1566 */ 1567 ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp, 1568 0, 1ULL << 32, 1569 new_page_dirs); 1570 if (!ret) 1571 *ppgtt->pdp.used_pdpes = *new_page_dirs; 1572 1573 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1574 1575 return ret; 1576 } 1577 1578 /* 1579 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 1580 * with a net effect resembling a 2-level page table in normal x86 terms. Each 1581 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 1582 * space. 1583 * 1584 */ 1585 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1586 { 1587 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 1588 int ret; 1589 1590 ret = gen8_init_scratch(&ppgtt->base); 1591 if (ret) 1592 return ret; 1593 1594 ppgtt->base.start = 0; 1595 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 1596 ppgtt->base.allocate_va_range = gen8_alloc_va_range; 1597 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 1598 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 1599 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 1600 ppgtt->base.bind_vma = ppgtt_bind_vma; 1601 ppgtt->debug_dump = gen8_dump_ppgtt; 1602 1603 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 1604 ret = setup_px(dev_priv, &ppgtt->pml4); 1605 if (ret) 1606 goto free_scratch; 1607 1608 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); 1609 1610 ppgtt->base.total = 1ULL << 48; 1611 ppgtt->switch_mm = gen8_48b_mm_switch; 1612 } else { 1613 ret = __pdp_init(dev_priv, &ppgtt->pdp); 1614 if (ret) 1615 goto free_scratch; 1616 1617 ppgtt->base.total = 1ULL << 32; 1618 ppgtt->switch_mm = gen8_legacy_mm_switch; 1619 trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 1620 0, 0, 1621 GEN8_PML4E_SHIFT); 1622 1623 if (intel_vgpu_active(dev_priv)) { 1624 ret = gen8_preallocate_top_level_pdps(ppgtt); 1625 if (ret) 1626 goto free_scratch; 1627 } 1628 } 1629 1630 if (intel_vgpu_active(dev_priv)) 1631 gen8_ppgtt_notify_vgt(ppgtt, true); 1632 1633 return 0; 1634 1635 free_scratch: 1636 gen8_free_scratch(&ppgtt->base); 1637 return ret; 1638 } 1639 1640 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1641 { 1642 struct i915_address_space *vm = &ppgtt->base; 1643 struct i915_page_table *unused; 1644 gen6_pte_t scratch_pte; 1645 uint32_t pd_entry; 1646 uint32_t pte, pde; 1647 uint32_t start = ppgtt->base.start, length = ppgtt->base.total; 1648 1649 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 1650 I915_CACHE_LLC, 0); 1651 1652 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) { 1653 u32 expected; 1654 gen6_pte_t *pt_vaddr; 1655 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]); 1656 pd_entry = readl(ppgtt->pd_addr + pde); 1657 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 1658 1659 if (pd_entry != expected) 1660 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 1661 pde, 1662 pd_entry, 1663 expected); 1664 seq_printf(m, "\tPDE: %x\n", pd_entry); 1665 1666 pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]); 1667 1668 for (pte = 0; pte < GEN6_PTES; pte+=4) { 1669 unsigned long va = 1670 (pde * PAGE_SIZE * GEN6_PTES) + 1671 (pte * PAGE_SIZE); 1672 int i; 1673 bool found = false; 1674 for (i = 0; i < 4; i++) 1675 if (pt_vaddr[pte + i] != scratch_pte) 1676 found = true; 1677 if (!found) 1678 continue; 1679 1680 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 1681 for (i = 0; i < 4; i++) { 1682 if (pt_vaddr[pte + i] != scratch_pte) 1683 seq_printf(m, " %08x", pt_vaddr[pte + i]); 1684 else 1685 seq_puts(m, " SCRATCH "); 1686 } 1687 seq_puts(m, "\n"); 1688 } 1689 kunmap_px(ppgtt, pt_vaddr); 1690 } 1691 } 1692 1693 /* Write pde (index) from the page directory @pd to the page table @pt */ 1694 static void gen6_write_pde(struct i915_page_directory *pd, 1695 const int pde, struct i915_page_table *pt) 1696 { 1697 /* Caller needs to make sure the write completes if necessary */ 1698 struct i915_hw_ppgtt *ppgtt = 1699 container_of(pd, struct i915_hw_ppgtt, pd); 1700 u32 pd_entry; 1701 1702 pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt)); 1703 pd_entry |= GEN6_PDE_VALID; 1704 1705 writel(pd_entry, ppgtt->pd_addr + pde); 1706 } 1707 1708 /* Write all the page tables found in the ppgtt structure to incrementing page 1709 * directories. */ 1710 static void gen6_write_page_range(struct drm_i915_private *dev_priv, 1711 struct i915_page_directory *pd, 1712 uint32_t start, uint32_t length) 1713 { 1714 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1715 struct i915_page_table *pt; 1716 uint32_t pde; 1717 1718 gen6_for_each_pde(pt, pd, start, length, pde) 1719 gen6_write_pde(pd, pde, pt); 1720 1721 /* Make sure write is complete before other code can use this page 1722 * table. Also require for WC mapped PTEs */ 1723 readl(ggtt->gsm); 1724 } 1725 1726 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) 1727 { 1728 BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); 1729 1730 return (ppgtt->pd.base.ggtt_offset / 64) << 16; 1731 } 1732 1733 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 1734 struct drm_i915_gem_request *req) 1735 { 1736 struct intel_ring *ring = req->ring; 1737 struct intel_engine_cs *engine = req->engine; 1738 int ret; 1739 1740 /* NB: TLBs must be flushed and invalidated before a switch */ 1741 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1742 if (ret) 1743 return ret; 1744 1745 ret = intel_ring_begin(req, 6); 1746 if (ret) 1747 return ret; 1748 1749 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 1750 intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); 1751 intel_ring_emit(ring, PP_DIR_DCLV_2G); 1752 intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); 1753 intel_ring_emit(ring, get_pd_offset(ppgtt)); 1754 intel_ring_emit(ring, MI_NOOP); 1755 intel_ring_advance(ring); 1756 1757 return 0; 1758 } 1759 1760 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 1761 struct drm_i915_gem_request *req) 1762 { 1763 struct intel_ring *ring = req->ring; 1764 struct intel_engine_cs *engine = req->engine; 1765 int ret; 1766 1767 /* NB: TLBs must be flushed and invalidated before a switch */ 1768 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1769 if (ret) 1770 return ret; 1771 1772 ret = intel_ring_begin(req, 6); 1773 if (ret) 1774 return ret; 1775 1776 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 1777 intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); 1778 intel_ring_emit(ring, PP_DIR_DCLV_2G); 1779 intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); 1780 intel_ring_emit(ring, get_pd_offset(ppgtt)); 1781 intel_ring_emit(ring, MI_NOOP); 1782 intel_ring_advance(ring); 1783 1784 /* XXX: RCS is the only one to auto invalidate the TLBs? */ 1785 if (engine->id != RCS) { 1786 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1787 if (ret) 1788 return ret; 1789 } 1790 1791 return 0; 1792 } 1793 1794 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 1795 struct drm_i915_gem_request *req) 1796 { 1797 struct intel_engine_cs *engine = req->engine; 1798 struct drm_i915_private *dev_priv = req->i915; 1799 1800 I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); 1801 I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); 1802 return 0; 1803 } 1804 1805 static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv) 1806 { 1807 struct intel_engine_cs *engine; 1808 enum intel_engine_id id; 1809 1810 for_each_engine(engine, dev_priv, id) { 1811 u32 four_level = USES_FULL_48BIT_PPGTT(dev_priv) ? 1812 GEN8_GFX_PPGTT_48B : 0; 1813 I915_WRITE(RING_MODE_GEN7(engine), 1814 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level)); 1815 } 1816 } 1817 1818 static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv) 1819 { 1820 struct intel_engine_cs *engine; 1821 uint32_t ecochk, ecobits; 1822 enum intel_engine_id id; 1823 1824 ecobits = I915_READ(GAC_ECO_BITS); 1825 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 1826 1827 ecochk = I915_READ(GAM_ECOCHK); 1828 if (IS_HASWELL(dev_priv)) { 1829 ecochk |= ECOCHK_PPGTT_WB_HSW; 1830 } else { 1831 ecochk |= ECOCHK_PPGTT_LLC_IVB; 1832 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 1833 } 1834 I915_WRITE(GAM_ECOCHK, ecochk); 1835 1836 for_each_engine(engine, dev_priv, id) { 1837 /* GFX_MODE is per-ring on gen7+ */ 1838 I915_WRITE(RING_MODE_GEN7(engine), 1839 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1840 } 1841 } 1842 1843 static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv) 1844 { 1845 uint32_t ecochk, gab_ctl, ecobits; 1846 1847 ecobits = I915_READ(GAC_ECO_BITS); 1848 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 1849 ECOBITS_PPGTT_CACHE64B); 1850 1851 gab_ctl = I915_READ(GAB_CTL); 1852 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 1853 1854 ecochk = I915_READ(GAM_ECOCHK); 1855 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 1856 1857 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1858 } 1859 1860 /* PPGTT support for Sandybdrige/Gen6 and later */ 1861 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 1862 uint64_t start, 1863 uint64_t length) 1864 { 1865 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1866 gen6_pte_t *pt_vaddr, scratch_pte; 1867 unsigned first_entry = start >> PAGE_SHIFT; 1868 unsigned num_entries = length >> PAGE_SHIFT; 1869 unsigned act_pt = first_entry / GEN6_PTES; 1870 unsigned first_pte = first_entry % GEN6_PTES; 1871 unsigned last_pte, i; 1872 1873 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 1874 I915_CACHE_LLC, 0); 1875 1876 while (num_entries) { 1877 last_pte = first_pte + num_entries; 1878 if (last_pte > GEN6_PTES) 1879 last_pte = GEN6_PTES; 1880 1881 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1882 1883 for (i = first_pte; i < last_pte; i++) 1884 pt_vaddr[i] = scratch_pte; 1885 1886 kunmap_px(ppgtt, pt_vaddr); 1887 1888 num_entries -= last_pte - first_pte; 1889 first_pte = 0; 1890 act_pt++; 1891 } 1892 } 1893 1894 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 1895 struct sg_table *pages, 1896 uint64_t start, 1897 enum i915_cache_level cache_level, u32 flags) 1898 { 1899 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1900 unsigned first_entry = start >> PAGE_SHIFT; 1901 unsigned act_pt = first_entry / GEN6_PTES; 1902 unsigned act_pte = first_entry % GEN6_PTES; 1903 gen6_pte_t *pt_vaddr = NULL; 1904 struct sgt_iter sgt_iter; 1905 dma_addr_t addr; 1906 1907 for_each_sgt_dma(addr, sgt_iter, pages) { 1908 if (pt_vaddr == NULL) 1909 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1910 1911 pt_vaddr[act_pte] = 1912 vm->pte_encode(addr, cache_level, flags); 1913 1914 if (++act_pte == GEN6_PTES) { 1915 kunmap_px(ppgtt, pt_vaddr); 1916 pt_vaddr = NULL; 1917 act_pt++; 1918 act_pte = 0; 1919 } 1920 } 1921 1922 if (pt_vaddr) 1923 kunmap_px(ppgtt, pt_vaddr); 1924 } 1925 1926 static int gen6_alloc_va_range(struct i915_address_space *vm, 1927 uint64_t start_in, uint64_t length_in) 1928 { 1929 DECLARE_BITMAP(new_page_tables, I915_PDES); 1930 struct drm_i915_private *dev_priv = to_i915(vm->dev); 1931 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1932 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1933 struct i915_page_table *pt; 1934 uint32_t start, length, start_save, length_save; 1935 uint32_t pde; 1936 int ret; 1937 1938 if (WARN_ON(start_in + length_in > ppgtt->base.total)) 1939 return -ENODEV; 1940 1941 start = start_save = start_in; 1942 length = length_save = length_in; 1943 1944 bitmap_zero(new_page_tables, I915_PDES); 1945 1946 /* The allocation is done in two stages so that we can bail out with 1947 * minimal amount of pain. The first stage finds new page tables that 1948 * need allocation. The second stage marks use ptes within the page 1949 * tables. 1950 */ 1951 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { 1952 if (pt != vm->scratch_pt) { 1953 WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES)); 1954 continue; 1955 } 1956 1957 /* We've already allocated a page table */ 1958 WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); 1959 1960 pt = alloc_pt(dev_priv); 1961 if (IS_ERR(pt)) { 1962 ret = PTR_ERR(pt); 1963 goto unwind_out; 1964 } 1965 1966 gen6_initialize_pt(vm, pt); 1967 1968 ppgtt->pd.page_table[pde] = pt; 1969 __set_bit(pde, new_page_tables); 1970 trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT); 1971 } 1972 1973 start = start_save; 1974 length = length_save; 1975 1976 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { 1977 DECLARE_BITMAP(tmp_bitmap, GEN6_PTES); 1978 1979 bitmap_zero(tmp_bitmap, GEN6_PTES); 1980 bitmap_set(tmp_bitmap, gen6_pte_index(start), 1981 gen6_pte_count(start, length)); 1982 1983 if (__test_and_clear_bit(pde, new_page_tables)) 1984 gen6_write_pde(&ppgtt->pd, pde, pt); 1985 1986 trace_i915_page_table_entry_map(vm, pde, pt, 1987 gen6_pte_index(start), 1988 gen6_pte_count(start, length), 1989 GEN6_PTES); 1990 bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes, 1991 GEN6_PTES); 1992 } 1993 1994 WARN_ON(!bitmap_empty(new_page_tables, I915_PDES)); 1995 1996 /* Make sure write is complete before other code can use this page 1997 * table. Also require for WC mapped PTEs */ 1998 readl(ggtt->gsm); 1999 2000 mark_tlbs_dirty(ppgtt); 2001 return 0; 2002 2003 unwind_out: 2004 for_each_set_bit(pde, new_page_tables, I915_PDES) { 2005 struct i915_page_table *pt = ppgtt->pd.page_table[pde]; 2006 2007 ppgtt->pd.page_table[pde] = vm->scratch_pt; 2008 free_pt(dev_priv, pt); 2009 } 2010 2011 mark_tlbs_dirty(ppgtt); 2012 return ret; 2013 } 2014 2015 static int gen6_init_scratch(struct i915_address_space *vm) 2016 { 2017 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2018 int ret; 2019 2020 ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA); 2021 if (ret) 2022 return ret; 2023 2024 vm->scratch_pt = alloc_pt(dev_priv); 2025 if (IS_ERR(vm->scratch_pt)) { 2026 cleanup_scratch_page(dev_priv, &vm->scratch_page); 2027 return PTR_ERR(vm->scratch_pt); 2028 } 2029 2030 gen6_initialize_pt(vm, vm->scratch_pt); 2031 2032 return 0; 2033 } 2034 2035 static void gen6_free_scratch(struct i915_address_space *vm) 2036 { 2037 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2038 2039 free_pt(dev_priv, vm->scratch_pt); 2040 cleanup_scratch_page(dev_priv, &vm->scratch_page); 2041 } 2042 2043 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 2044 { 2045 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 2046 struct i915_page_directory *pd = &ppgtt->pd; 2047 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2048 struct i915_page_table *pt; 2049 uint32_t pde; 2050 2051 drm_mm_remove_node(&ppgtt->node); 2052 2053 gen6_for_all_pdes(pt, pd, pde) 2054 if (pt != vm->scratch_pt) 2055 free_pt(dev_priv, pt); 2056 2057 gen6_free_scratch(vm); 2058 } 2059 2060 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 2061 { 2062 struct i915_address_space *vm = &ppgtt->base; 2063 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 2064 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2065 bool retried = false; 2066 int ret; 2067 2068 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 2069 * allocator works in address space sizes, so it's multiplied by page 2070 * size. We allocate at the top of the GTT to avoid fragmentation. 2071 */ 2072 BUG_ON(!drm_mm_initialized(&ggtt->base.mm)); 2073 2074 ret = gen6_init_scratch(vm); 2075 if (ret) 2076 return ret; 2077 2078 alloc: 2079 ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, 2080 &ppgtt->node, GEN6_PD_SIZE, 2081 GEN6_PD_ALIGN, 0, 2082 0, ggtt->base.total, 2083 DRM_MM_TOPDOWN); 2084 if (ret == -ENOSPC && !retried) { 2085 ret = i915_gem_evict_something(&ggtt->base, 2086 GEN6_PD_SIZE, GEN6_PD_ALIGN, 2087 I915_CACHE_NONE, 2088 0, ggtt->base.total, 2089 0); 2090 if (ret) 2091 goto err_out; 2092 2093 retried = true; 2094 goto alloc; 2095 } 2096 2097 if (ret) 2098 goto err_out; 2099 2100 2101 if (ppgtt->node.start < ggtt->mappable_end) 2102 DRM_DEBUG("Forced to use aperture for PDEs\n"); 2103 2104 return 0; 2105 2106 err_out: 2107 gen6_free_scratch(vm); 2108 return ret; 2109 } 2110 2111 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 2112 { 2113 return gen6_ppgtt_allocate_page_directories(ppgtt); 2114 } 2115 2116 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, 2117 uint64_t start, uint64_t length) 2118 { 2119 struct i915_page_table *unused; 2120 uint32_t pde; 2121 2122 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) 2123 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; 2124 } 2125 2126 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 2127 { 2128 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 2129 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2130 int ret; 2131 2132 ppgtt->base.pte_encode = ggtt->base.pte_encode; 2133 if (intel_vgpu_active(dev_priv) || IS_GEN6(dev_priv)) 2134 ppgtt->switch_mm = gen6_mm_switch; 2135 else if (IS_HASWELL(dev_priv)) 2136 ppgtt->switch_mm = hsw_mm_switch; 2137 else if (IS_GEN7(dev_priv)) 2138 ppgtt->switch_mm = gen7_mm_switch; 2139 else 2140 BUG(); 2141 2142 ret = gen6_ppgtt_alloc(ppgtt); 2143 if (ret) 2144 return ret; 2145 2146 ppgtt->base.allocate_va_range = gen6_alloc_va_range; 2147 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 2148 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 2149 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 2150 ppgtt->base.bind_vma = ppgtt_bind_vma; 2151 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 2152 ppgtt->base.start = 0; 2153 ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; 2154 ppgtt->debug_dump = gen6_dump_ppgtt; 2155 2156 ppgtt->pd.base.ggtt_offset = 2157 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); 2158 2159 ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + 2160 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); 2161 2162 gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); 2163 2164 gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); 2165 2166 DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", 2167 ppgtt->node.size >> 20, 2168 ppgtt->node.start / PAGE_SIZE); 2169 2170 DRM_DEBUG("Adding PPGTT at offset %x\n", 2171 ppgtt->pd.base.ggtt_offset << 10); 2172 2173 return 0; 2174 } 2175 2176 static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, 2177 struct drm_i915_private *dev_priv) 2178 { 2179 ppgtt->base.dev = &dev_priv->drm; 2180 2181 if (INTEL_INFO(dev_priv)->gen < 8) 2182 return gen6_ppgtt_init(ppgtt); 2183 else 2184 return gen8_ppgtt_init(ppgtt); 2185 } 2186 2187 static void i915_address_space_init(struct i915_address_space *vm, 2188 struct drm_i915_private *dev_priv, 2189 const char *name) 2190 { 2191 i915_gem_timeline_init(dev_priv, &vm->timeline, name); 2192 drm_mm_init(&vm->mm, vm->start, vm->total); 2193 INIT_LIST_HEAD(&vm->active_list); 2194 INIT_LIST_HEAD(&vm->inactive_list); 2195 INIT_LIST_HEAD(&vm->unbound_list); 2196 list_add_tail(&vm->global_link, &dev_priv->vm_list); 2197 } 2198 2199 static void i915_address_space_fini(struct i915_address_space *vm) 2200 { 2201 i915_gem_timeline_fini(&vm->timeline); 2202 drm_mm_takedown(&vm->mm); 2203 list_del(&vm->global_link); 2204 } 2205 2206 static void gtt_write_workarounds(struct drm_i915_private *dev_priv) 2207 { 2208 /* This function is for gtt related workarounds. This function is 2209 * called on driver load and after a GPU reset, so you can place 2210 * workarounds here even if they get overwritten by GPU reset. 2211 */ 2212 /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */ 2213 if (IS_BROADWELL(dev_priv)) 2214 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 2215 else if (IS_CHERRYVIEW(dev_priv)) 2216 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 2217 else if (IS_SKYLAKE(dev_priv)) 2218 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 2219 else if (IS_BROXTON(dev_priv)) 2220 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 2221 } 2222 2223 static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt, 2224 struct drm_i915_private *dev_priv, 2225 struct drm_i915_file_private *file_priv, 2226 const char *name) 2227 { 2228 int ret; 2229 2230 ret = __hw_ppgtt_init(ppgtt, dev_priv); 2231 if (ret == 0) { 2232 kref_init(&ppgtt->ref); 2233 i915_address_space_init(&ppgtt->base, dev_priv, name); 2234 ppgtt->base.file = file_priv; 2235 } 2236 2237 return ret; 2238 } 2239 2240 int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv) 2241 { 2242 gtt_write_workarounds(dev_priv); 2243 2244 /* In the case of execlists, PPGTT is enabled by the context descriptor 2245 * and the PDPs are contained within the context itself. We don't 2246 * need to do anything here. */ 2247 if (i915.enable_execlists) 2248 return 0; 2249 2250 if (!USES_PPGTT(dev_priv)) 2251 return 0; 2252 2253 if (IS_GEN6(dev_priv)) 2254 gen6_ppgtt_enable(dev_priv); 2255 else if (IS_GEN7(dev_priv)) 2256 gen7_ppgtt_enable(dev_priv); 2257 else if (INTEL_GEN(dev_priv) >= 8) 2258 gen8_ppgtt_enable(dev_priv); 2259 else 2260 MISSING_CASE(INTEL_GEN(dev_priv)); 2261 2262 return 0; 2263 } 2264 2265 struct i915_hw_ppgtt * 2266 i915_ppgtt_create(struct drm_i915_private *dev_priv, 2267 struct drm_i915_file_private *fpriv, 2268 const char *name) 2269 { 2270 struct i915_hw_ppgtt *ppgtt; 2271 int ret; 2272 2273 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2274 if (!ppgtt) 2275 return ERR_PTR(-ENOMEM); 2276 2277 ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv, name); 2278 if (ret) { 2279 kfree(ppgtt); 2280 return ERR_PTR(ret); 2281 } 2282 2283 trace_i915_ppgtt_create(&ppgtt->base); 2284 2285 return ppgtt; 2286 } 2287 2288 void i915_ppgtt_release(struct kref *kref) 2289 { 2290 struct i915_hw_ppgtt *ppgtt = 2291 container_of(kref, struct i915_hw_ppgtt, ref); 2292 2293 trace_i915_ppgtt_release(&ppgtt->base); 2294 2295 /* vmas should already be unbound and destroyed */ 2296 WARN_ON(!list_empty(&ppgtt->base.active_list)); 2297 WARN_ON(!list_empty(&ppgtt->base.inactive_list)); 2298 WARN_ON(!list_empty(&ppgtt->base.unbound_list)); 2299 2300 i915_address_space_fini(&ppgtt->base); 2301 2302 ppgtt->base.cleanup(&ppgtt->base); 2303 kfree(ppgtt); 2304 } 2305 2306 /* Certain Gen5 chipsets require require idling the GPU before 2307 * unmapping anything from the GTT when VT-d is enabled. 2308 */ 2309 static bool needs_idle_maps(struct drm_i915_private *dev_priv) 2310 { 2311 #ifdef CONFIG_INTEL_IOMMU 2312 /* Query intel_iommu to see if we need the workaround. Presumably that 2313 * was loaded first. 2314 */ 2315 if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped) 2316 return true; 2317 #endif 2318 return false; 2319 } 2320 2321 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) 2322 { 2323 struct intel_engine_cs *engine; 2324 enum intel_engine_id id; 2325 2326 if (INTEL_INFO(dev_priv)->gen < 6) 2327 return; 2328 2329 for_each_engine(engine, dev_priv, id) { 2330 u32 fault_reg; 2331 fault_reg = I915_READ(RING_FAULT_REG(engine)); 2332 if (fault_reg & RING_FAULT_VALID) { 2333 DRM_DEBUG_DRIVER("Unexpected fault\n" 2334 "\tAddr: 0x%08lx\n" 2335 "\tAddress space: %s\n" 2336 "\tSource ID: %d\n" 2337 "\tType: %d\n", 2338 fault_reg & PAGE_MASK, 2339 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 2340 RING_FAULT_SRCID(fault_reg), 2341 RING_FAULT_FAULT_TYPE(fault_reg)); 2342 I915_WRITE(RING_FAULT_REG(engine), 2343 fault_reg & ~RING_FAULT_VALID); 2344 } 2345 } 2346 2347 /* Engine specific init may not have been done till this point. */ 2348 if (dev_priv->engine[RCS]) 2349 POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS])); 2350 } 2351 2352 static void i915_ggtt_flush(struct drm_i915_private *dev_priv) 2353 { 2354 if (INTEL_INFO(dev_priv)->gen < 6) { 2355 intel_gtt_chipset_flush(); 2356 } else { 2357 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2358 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2359 } 2360 } 2361 2362 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv) 2363 { 2364 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2365 2366 /* Don't bother messing with faults pre GEN6 as we have little 2367 * documentation supporting that it's a good idea. 2368 */ 2369 if (INTEL_GEN(dev_priv) < 6) 2370 return; 2371 2372 i915_check_and_clear_faults(dev_priv); 2373 2374 ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); 2375 2376 i915_ggtt_flush(dev_priv); 2377 } 2378 2379 int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, 2380 struct sg_table *pages) 2381 { 2382 if (dma_map_sg(&obj->base.dev->pdev->dev, 2383 pages->sgl, pages->nents, 2384 PCI_DMA_BIDIRECTIONAL)) 2385 return 0; 2386 2387 return -ENOSPC; 2388 } 2389 2390 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 2391 { 2392 writeq(pte, addr); 2393 } 2394 2395 static void gen8_ggtt_insert_page(struct i915_address_space *vm, 2396 dma_addr_t addr, 2397 uint64_t offset, 2398 enum i915_cache_level level, 2399 u32 unused) 2400 { 2401 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2402 gen8_pte_t __iomem *pte = 2403 (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + 2404 (offset >> PAGE_SHIFT); 2405 2406 gen8_set_pte(pte, gen8_pte_encode(addr, level)); 2407 2408 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2409 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2410 } 2411 2412 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 2413 struct sg_table *st, 2414 uint64_t start, 2415 enum i915_cache_level level, u32 unused) 2416 { 2417 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2418 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2419 struct sgt_iter sgt_iter; 2420 gen8_pte_t __iomem *gtt_entries; 2421 gen8_pte_t gtt_entry; 2422 dma_addr_t addr; 2423 int i = 0; 2424 2425 gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); 2426 2427 for_each_sgt_dma(addr, sgt_iter, st) { 2428 gtt_entry = gen8_pte_encode(addr, level); 2429 gen8_set_pte(>t_entries[i++], gtt_entry); 2430 } 2431 2432 /* 2433 * XXX: This serves as a posting read to make sure that the PTE has 2434 * actually been updated. There is some concern that even though 2435 * registers and PTEs are within the same BAR that they are potentially 2436 * of NUMA access patterns. Therefore, even with the way we assume 2437 * hardware should work, we must keep this posting read for paranoia. 2438 */ 2439 if (i != 0) 2440 WARN_ON(readq(>t_entries[i-1]) != gtt_entry); 2441 2442 /* This next bit makes the above posting read even more important. We 2443 * want to flush the TLBs only after we're certain all the PTE updates 2444 * have finished. 2445 */ 2446 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2447 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2448 } 2449 2450 struct insert_entries { 2451 struct i915_address_space *vm; 2452 struct sg_table *st; 2453 uint64_t start; 2454 enum i915_cache_level level; 2455 u32 flags; 2456 }; 2457 2458 static int gen8_ggtt_insert_entries__cb(void *_arg) 2459 { 2460 struct insert_entries *arg = _arg; 2461 gen8_ggtt_insert_entries(arg->vm, arg->st, 2462 arg->start, arg->level, arg->flags); 2463 return 0; 2464 } 2465 2466 static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, 2467 struct sg_table *st, 2468 uint64_t start, 2469 enum i915_cache_level level, 2470 u32 flags) 2471 { 2472 struct insert_entries arg = { vm, st, start, level, flags }; 2473 stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); 2474 } 2475 2476 static void gen6_ggtt_insert_page(struct i915_address_space *vm, 2477 dma_addr_t addr, 2478 uint64_t offset, 2479 enum i915_cache_level level, 2480 u32 flags) 2481 { 2482 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2483 gen6_pte_t __iomem *pte = 2484 (gen6_pte_t __iomem *)dev_priv->ggtt.gsm + 2485 (offset >> PAGE_SHIFT); 2486 2487 iowrite32(vm->pte_encode(addr, level, flags), pte); 2488 2489 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2490 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2491 } 2492 2493 /* 2494 * Binds an object into the global gtt with the specified cache level. The object 2495 * will be accessible to the GPU via commands whose operands reference offsets 2496 * within the global GTT as well as accessible by the GPU through the GMADR 2497 * mapped BAR (dev_priv->mm.gtt->gtt). 2498 */ 2499 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 2500 struct sg_table *st, 2501 uint64_t start, 2502 enum i915_cache_level level, u32 flags) 2503 { 2504 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2505 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2506 struct sgt_iter sgt_iter; 2507 gen6_pte_t __iomem *gtt_entries; 2508 gen6_pte_t gtt_entry; 2509 dma_addr_t addr; 2510 int i = 0; 2511 2512 gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); 2513 2514 for_each_sgt_dma(addr, sgt_iter, st) { 2515 gtt_entry = vm->pte_encode(addr, level, flags); 2516 iowrite32(gtt_entry, >t_entries[i++]); 2517 } 2518 2519 /* XXX: This serves as a posting read to make sure that the PTE has 2520 * actually been updated. There is some concern that even though 2521 * registers and PTEs are within the same BAR that they are potentially 2522 * of NUMA access patterns. Therefore, even with the way we assume 2523 * hardware should work, we must keep this posting read for paranoia. 2524 */ 2525 if (i != 0) 2526 WARN_ON(readl(>t_entries[i-1]) != gtt_entry); 2527 2528 /* This next bit makes the above posting read even more important. We 2529 * want to flush the TLBs only after we're certain all the PTE updates 2530 * have finished. 2531 */ 2532 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2533 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2534 } 2535 2536 static void nop_clear_range(struct i915_address_space *vm, 2537 uint64_t start, uint64_t length) 2538 { 2539 } 2540 2541 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 2542 uint64_t start, uint64_t length) 2543 { 2544 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2545 unsigned first_entry = start >> PAGE_SHIFT; 2546 unsigned num_entries = length >> PAGE_SHIFT; 2547 gen8_pte_t scratch_pte, __iomem *gtt_base = 2548 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 2549 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2550 int i; 2551 2552 if (WARN(num_entries > max_entries, 2553 "First entry = %d; Num entries = %d (max=%d)\n", 2554 first_entry, num_entries, max_entries)) 2555 num_entries = max_entries; 2556 2557 scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 2558 I915_CACHE_LLC); 2559 for (i = 0; i < num_entries; i++) 2560 gen8_set_pte(>t_base[i], scratch_pte); 2561 readl(gtt_base); 2562 } 2563 2564 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 2565 uint64_t start, 2566 uint64_t length) 2567 { 2568 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2569 unsigned first_entry = start >> PAGE_SHIFT; 2570 unsigned num_entries = length >> PAGE_SHIFT; 2571 gen6_pte_t scratch_pte, __iomem *gtt_base = 2572 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 2573 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2574 int i; 2575 2576 if (WARN(num_entries > max_entries, 2577 "First entry = %d; Num entries = %d (max=%d)\n", 2578 first_entry, num_entries, max_entries)) 2579 num_entries = max_entries; 2580 2581 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 2582 I915_CACHE_LLC, 0); 2583 2584 for (i = 0; i < num_entries; i++) 2585 iowrite32(scratch_pte, >t_base[i]); 2586 readl(gtt_base); 2587 } 2588 2589 static void i915_ggtt_insert_page(struct i915_address_space *vm, 2590 dma_addr_t addr, 2591 uint64_t offset, 2592 enum i915_cache_level cache_level, 2593 u32 unused) 2594 { 2595 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2596 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2597 2598 intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); 2599 } 2600 2601 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 2602 struct sg_table *pages, 2603 uint64_t start, 2604 enum i915_cache_level cache_level, u32 unused) 2605 { 2606 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2607 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2608 2609 intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags); 2610 2611 } 2612 2613 static void i915_ggtt_clear_range(struct i915_address_space *vm, 2614 uint64_t start, 2615 uint64_t length) 2616 { 2617 intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); 2618 } 2619 2620 static int ggtt_bind_vma(struct i915_vma *vma, 2621 enum i915_cache_level cache_level, 2622 u32 flags) 2623 { 2624 struct drm_i915_private *i915 = to_i915(vma->vm->dev); 2625 struct drm_i915_gem_object *obj = vma->obj; 2626 u32 pte_flags = 0; 2627 int ret; 2628 2629 ret = i915_get_ggtt_vma_pages(vma); 2630 if (ret) 2631 return ret; 2632 2633 /* Currently applicable only to VLV */ 2634 if (obj->gt_ro) 2635 pte_flags |= PTE_READ_ONLY; 2636 2637 intel_runtime_pm_get(i915); 2638 vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, 2639 cache_level, pte_flags); 2640 intel_runtime_pm_put(i915); 2641 2642 /* 2643 * Without aliasing PPGTT there's no difference between 2644 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally 2645 * upgrade to both bound if we bind either to avoid double-binding. 2646 */ 2647 vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; 2648 2649 return 0; 2650 } 2651 2652 static int aliasing_gtt_bind_vma(struct i915_vma *vma, 2653 enum i915_cache_level cache_level, 2654 u32 flags) 2655 { 2656 struct drm_i915_private *i915 = to_i915(vma->vm->dev); 2657 u32 pte_flags; 2658 int ret; 2659 2660 ret = i915_get_ggtt_vma_pages(vma); 2661 if (ret) 2662 return ret; 2663 2664 /* Currently applicable only to VLV */ 2665 pte_flags = 0; 2666 if (vma->obj->gt_ro) 2667 pte_flags |= PTE_READ_ONLY; 2668 2669 2670 if (flags & I915_VMA_GLOBAL_BIND) { 2671 intel_runtime_pm_get(i915); 2672 vma->vm->insert_entries(vma->vm, 2673 vma->pages, vma->node.start, 2674 cache_level, pte_flags); 2675 intel_runtime_pm_put(i915); 2676 } 2677 2678 if (flags & I915_VMA_LOCAL_BIND) { 2679 struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; 2680 appgtt->base.insert_entries(&appgtt->base, 2681 vma->pages, vma->node.start, 2682 cache_level, pte_flags); 2683 } 2684 2685 return 0; 2686 } 2687 2688 static void ggtt_unbind_vma(struct i915_vma *vma) 2689 { 2690 struct drm_i915_private *i915 = to_i915(vma->vm->dev); 2691 struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; 2692 const u64 size = min(vma->size, vma->node.size); 2693 2694 if (vma->flags & I915_VMA_GLOBAL_BIND) { 2695 intel_runtime_pm_get(i915); 2696 vma->vm->clear_range(vma->vm, 2697 vma->node.start, size); 2698 intel_runtime_pm_put(i915); 2699 } 2700 2701 if (vma->flags & I915_VMA_LOCAL_BIND && appgtt) 2702 appgtt->base.clear_range(&appgtt->base, 2703 vma->node.start, size); 2704 } 2705 2706 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, 2707 struct sg_table *pages) 2708 { 2709 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2710 struct device *kdev = &dev_priv->drm.pdev->dev; 2711 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2712 2713 if (unlikely(ggtt->do_idle_maps)) { 2714 if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) { 2715 DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); 2716 /* Wait a bit, in hopes it avoids the hang */ 2717 udelay(10); 2718 } 2719 } 2720 2721 dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL); 2722 } 2723 2724 static void i915_gtt_color_adjust(struct drm_mm_node *node, 2725 unsigned long color, 2726 u64 *start, 2727 u64 *end) 2728 { 2729 if (node->color != color) 2730 *start += 4096; 2731 2732 node = list_first_entry_or_null(&node->node_list, 2733 struct drm_mm_node, 2734 node_list); 2735 if (node && node->allocated && node->color != color) 2736 *end -= 4096; 2737 } 2738 2739 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) 2740 { 2741 /* Let GEM Manage all of the aperture. 2742 * 2743 * However, leave one page at the end still bound to the scratch page. 2744 * There are a number of places where the hardware apparently prefetches 2745 * past the end of the object, and we've seen multiple hangs with the 2746 * GPU head pointer stuck in a batchbuffer bound at the last page of the 2747 * aperture. One page should be enough to keep any prefetching inside 2748 * of the aperture. 2749 */ 2750 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2751 unsigned long hole_start, hole_end; 2752 struct i915_hw_ppgtt *ppgtt; 2753 struct drm_mm_node *entry; 2754 int ret; 2755 2756 ret = intel_vgt_balloon(dev_priv); 2757 if (ret) 2758 return ret; 2759 2760 /* Reserve a mappable slot for our lockless error capture */ 2761 ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, 2762 &ggtt->error_capture, 2763 4096, 0, -1, 2764 0, ggtt->mappable_end, 2765 0, 0); 2766 if (ret) 2767 return ret; 2768 2769 /* Clear any non-preallocated blocks */ 2770 drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { 2771 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 2772 hole_start, hole_end); 2773 ggtt->base.clear_range(&ggtt->base, hole_start, 2774 hole_end - hole_start); 2775 } 2776 2777 /* And finally clear the reserved guard page */ 2778 ggtt->base.clear_range(&ggtt->base, 2779 ggtt->base.total - PAGE_SIZE, PAGE_SIZE); 2780 2781 if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { 2782 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2783 if (!ppgtt) { 2784 ret = -ENOMEM; 2785 goto err; 2786 } 2787 2788 ret = __hw_ppgtt_init(ppgtt, dev_priv); 2789 if (ret) 2790 goto err_ppgtt; 2791 2792 if (ppgtt->base.allocate_va_range) { 2793 ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0, 2794 ppgtt->base.total); 2795 if (ret) 2796 goto err_ppgtt_cleanup; 2797 } 2798 2799 ppgtt->base.clear_range(&ppgtt->base, 2800 ppgtt->base.start, 2801 ppgtt->base.total); 2802 2803 dev_priv->mm.aliasing_ppgtt = ppgtt; 2804 WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); 2805 ggtt->base.bind_vma = aliasing_gtt_bind_vma; 2806 } 2807 2808 return 0; 2809 2810 err_ppgtt_cleanup: 2811 ppgtt->base.cleanup(&ppgtt->base); 2812 err_ppgtt: 2813 kfree(ppgtt); 2814 err: 2815 drm_mm_remove_node(&ggtt->error_capture); 2816 return ret; 2817 } 2818 2819 /** 2820 * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization 2821 * @dev_priv: i915 device 2822 */ 2823 void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) 2824 { 2825 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2826 2827 if (dev_priv->mm.aliasing_ppgtt) { 2828 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 2829 ppgtt->base.cleanup(&ppgtt->base); 2830 kfree(ppgtt); 2831 } 2832 2833 i915_gem_cleanup_stolen(&dev_priv->drm); 2834 2835 if (drm_mm_node_allocated(&ggtt->error_capture)) 2836 drm_mm_remove_node(&ggtt->error_capture); 2837 2838 if (drm_mm_initialized(&ggtt->base.mm)) { 2839 intel_vgt_deballoon(dev_priv); 2840 2841 mutex_lock(&dev_priv->drm.struct_mutex); 2842 i915_address_space_fini(&ggtt->base); 2843 mutex_unlock(&dev_priv->drm.struct_mutex); 2844 } 2845 2846 ggtt->base.cleanup(&ggtt->base); 2847 2848 arch_phys_wc_del(ggtt->mtrr); 2849 io_mapping_fini(&ggtt->mappable); 2850 } 2851 2852 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 2853 { 2854 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 2855 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 2856 return snb_gmch_ctl << 20; 2857 } 2858 2859 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 2860 { 2861 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 2862 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 2863 if (bdw_gmch_ctl) 2864 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 2865 2866 #ifdef CONFIG_X86_32 2867 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ 2868 if (bdw_gmch_ctl > 4) 2869 bdw_gmch_ctl = 4; 2870 #endif 2871 2872 return bdw_gmch_ctl << 20; 2873 } 2874 2875 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 2876 { 2877 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 2878 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 2879 2880 if (gmch_ctrl) 2881 return 1 << (20 + gmch_ctrl); 2882 2883 return 0; 2884 } 2885 2886 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 2887 { 2888 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 2889 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 2890 return snb_gmch_ctl << 25; /* 32 MB units */ 2891 } 2892 2893 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 2894 { 2895 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2896 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 2897 return bdw_gmch_ctl << 25; /* 32 MB units */ 2898 } 2899 2900 static size_t chv_get_stolen_size(u16 gmch_ctrl) 2901 { 2902 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; 2903 gmch_ctrl &= SNB_GMCH_GMS_MASK; 2904 2905 /* 2906 * 0x0 to 0x10: 32MB increments starting at 0MB 2907 * 0x11 to 0x16: 4MB increments starting at 8MB 2908 * 0x17 to 0x1d: 4MB increments start at 36MB 2909 */ 2910 if (gmch_ctrl < 0x11) 2911 return gmch_ctrl << 25; 2912 else if (gmch_ctrl < 0x17) 2913 return (gmch_ctrl - 0x11 + 2) << 22; 2914 else 2915 return (gmch_ctrl - 0x17 + 9) << 22; 2916 } 2917 2918 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) 2919 { 2920 gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2921 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; 2922 2923 if (gen9_gmch_ctl < 0xf0) 2924 return gen9_gmch_ctl << 25; /* 32 MB units */ 2925 else 2926 /* 4MB increments starting at 0xf0 for 4MB */ 2927 return (gen9_gmch_ctl - 0xf0 + 1) << 22; 2928 } 2929 2930 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 2931 { 2932 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); 2933 struct pci_dev *pdev = ggtt->base.dev->pdev; 2934 phys_addr_t phys_addr; 2935 int ret; 2936 2937 /* For Modern GENs the PTEs and register space are split in the BAR */ 2938 phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; 2939 2940 /* 2941 * On BXT writes larger than 64 bit to the GTT pagetable range will be 2942 * dropped. For WC mappings in general we have 64 byte burst writes 2943 * when the WC buffer is flushed, so we can't use it, but have to 2944 * resort to an uncached mapping. The WC issue is easily caught by the 2945 * readback check when writing GTT PTE entries. 2946 */ 2947 if (IS_BROXTON(dev_priv)) 2948 ggtt->gsm = ioremap_nocache(phys_addr, size); 2949 else 2950 ggtt->gsm = ioremap_wc(phys_addr, size); 2951 if (!ggtt->gsm) { 2952 DRM_ERROR("Failed to map the ggtt page table\n"); 2953 return -ENOMEM; 2954 } 2955 2956 ret = setup_scratch_page(dev_priv, &ggtt->base.scratch_page, GFP_DMA32); 2957 if (ret) { 2958 DRM_ERROR("Scratch setup failed\n"); 2959 /* iounmap will also get called at remove, but meh */ 2960 iounmap(ggtt->gsm); 2961 return ret; 2962 } 2963 2964 return 0; 2965 } 2966 2967 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 2968 * bits. When using advanced contexts each context stores its own PAT, but 2969 * writing this data shouldn't be harmful even in those cases. */ 2970 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) 2971 { 2972 uint64_t pat; 2973 2974 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 2975 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 2976 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 2977 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 2978 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 2979 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 2980 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 2981 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 2982 2983 if (!USES_PPGTT(dev_priv)) 2984 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry, 2985 * so RTL will always use the value corresponding to 2986 * pat_sel = 000". 2987 * So let's disable cache for GGTT to avoid screen corruptions. 2988 * MOCS still can be used though. 2989 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work 2990 * before this patch, i.e. the same uncached + snooping access 2991 * like on gen6/7 seems to be in effect. 2992 * - So this just fixes blitter/render access. Again it looks 2993 * like it's not just uncached access, but uncached + snooping. 2994 * So we can still hold onto all our assumptions wrt cpu 2995 * clflushing on LLC machines. 2996 */ 2997 pat = GEN8_PPAT(0, GEN8_PPAT_UC); 2998 2999 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 3000 * write would work. */ 3001 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 3002 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 3003 } 3004 3005 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) 3006 { 3007 uint64_t pat; 3008 3009 /* 3010 * Map WB on BDW to snooped on CHV. 3011 * 3012 * Only the snoop bit has meaning for CHV, the rest is 3013 * ignored. 3014 * 3015 * The hardware will never snoop for certain types of accesses: 3016 * - CPU GTT (GMADR->GGTT->no snoop->memory) 3017 * - PPGTT page tables 3018 * - some other special cycles 3019 * 3020 * As with BDW, we also need to consider the following for GT accesses: 3021 * "For GGTT, there is NO pat_sel[2:0] from the entry, 3022 * so RTL will always use the value corresponding to 3023 * pat_sel = 000". 3024 * Which means we must set the snoop bit in PAT entry 0 3025 * in order to keep the global status page working. 3026 */ 3027 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 3028 GEN8_PPAT(1, 0) | 3029 GEN8_PPAT(2, 0) | 3030 GEN8_PPAT(3, 0) | 3031 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 3032 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 3033 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 3034 GEN8_PPAT(7, CHV_PPAT_SNOOP); 3035 3036 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 3037 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 3038 } 3039 3040 static void gen6_gmch_remove(struct i915_address_space *vm) 3041 { 3042 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 3043 3044 iounmap(ggtt->gsm); 3045 cleanup_scratch_page(to_i915(vm->dev), &vm->scratch_page); 3046 } 3047 3048 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 3049 { 3050 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); 3051 struct pci_dev *pdev = dev_priv->drm.pdev; 3052 unsigned int size; 3053 u16 snb_gmch_ctl; 3054 3055 /* TODO: We're not aware of mappable constraints on gen8 yet */ 3056 ggtt->mappable_base = pci_resource_start(pdev, 2); 3057 ggtt->mappable_end = pci_resource_len(pdev, 2); 3058 3059 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39))) 3060 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); 3061 3062 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3063 3064 if (INTEL_GEN(dev_priv) >= 9) { 3065 ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl); 3066 size = gen8_get_total_gtt_size(snb_gmch_ctl); 3067 } else if (IS_CHERRYVIEW(dev_priv)) { 3068 ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl); 3069 size = chv_get_total_gtt_size(snb_gmch_ctl); 3070 } else { 3071 ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl); 3072 size = gen8_get_total_gtt_size(snb_gmch_ctl); 3073 } 3074 3075 ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT; 3076 3077 if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) 3078 chv_setup_private_ppat(dev_priv); 3079 else 3080 bdw_setup_private_ppat(dev_priv); 3081 3082 ggtt->base.cleanup = gen6_gmch_remove; 3083 ggtt->base.bind_vma = ggtt_bind_vma; 3084 ggtt->base.unbind_vma = ggtt_unbind_vma; 3085 ggtt->base.insert_page = gen8_ggtt_insert_page; 3086 ggtt->base.clear_range = nop_clear_range; 3087 if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) 3088 ggtt->base.clear_range = gen8_ggtt_clear_range; 3089 3090 ggtt->base.insert_entries = gen8_ggtt_insert_entries; 3091 if (IS_CHERRYVIEW(dev_priv)) 3092 ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; 3093 3094 return ggtt_probe_common(ggtt, size); 3095 } 3096 3097 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 3098 { 3099 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); 3100 struct pci_dev *pdev = dev_priv->drm.pdev; 3101 unsigned int size; 3102 u16 snb_gmch_ctl; 3103 3104 ggtt->mappable_base = pci_resource_start(pdev, 2); 3105 ggtt->mappable_end = pci_resource_len(pdev, 2); 3106 3107 /* 64/512MB is the current min/max we actually know of, but this is just 3108 * a coarse sanity check. 3109 */ 3110 if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { 3111 DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end); 3112 return -ENXIO; 3113 } 3114 3115 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40))) 3116 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); 3117 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3118 3119 ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); 3120 3121 size = gen6_get_total_gtt_size(snb_gmch_ctl); 3122 ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT; 3123 3124 ggtt->base.clear_range = gen6_ggtt_clear_range; 3125 ggtt->base.insert_page = gen6_ggtt_insert_page; 3126 ggtt->base.insert_entries = gen6_ggtt_insert_entries; 3127 ggtt->base.bind_vma = ggtt_bind_vma; 3128 ggtt->base.unbind_vma = ggtt_unbind_vma; 3129 ggtt->base.cleanup = gen6_gmch_remove; 3130 3131 if (HAS_EDRAM(dev_priv)) 3132 ggtt->base.pte_encode = iris_pte_encode; 3133 else if (IS_HASWELL(dev_priv)) 3134 ggtt->base.pte_encode = hsw_pte_encode; 3135 else if (IS_VALLEYVIEW(dev_priv)) 3136 ggtt->base.pte_encode = byt_pte_encode; 3137 else if (INTEL_GEN(dev_priv) >= 7) 3138 ggtt->base.pte_encode = ivb_pte_encode; 3139 else 3140 ggtt->base.pte_encode = snb_pte_encode; 3141 3142 return ggtt_probe_common(ggtt, size); 3143 } 3144 3145 static void i915_gmch_remove(struct i915_address_space *vm) 3146 { 3147 intel_gmch_remove(); 3148 } 3149 3150 static int i915_gmch_probe(struct i915_ggtt *ggtt) 3151 { 3152 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); 3153 int ret; 3154 3155 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL); 3156 if (!ret) { 3157 DRM_ERROR("failed to set up gmch\n"); 3158 return -EIO; 3159 } 3160 3161 intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size, 3162 &ggtt->mappable_base, &ggtt->mappable_end); 3163 3164 ggtt->do_idle_maps = needs_idle_maps(dev_priv); 3165 ggtt->base.insert_page = i915_ggtt_insert_page; 3166 ggtt->base.insert_entries = i915_ggtt_insert_entries; 3167 ggtt->base.clear_range = i915_ggtt_clear_range; 3168 ggtt->base.bind_vma = ggtt_bind_vma; 3169 ggtt->base.unbind_vma = ggtt_unbind_vma; 3170 ggtt->base.cleanup = i915_gmch_remove; 3171 3172 if (unlikely(ggtt->do_idle_maps)) 3173 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 3174 3175 return 0; 3176 } 3177 3178 /** 3179 * i915_ggtt_probe_hw - Probe GGTT hardware location 3180 * @dev_priv: i915 device 3181 */ 3182 int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) 3183 { 3184 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3185 int ret; 3186 3187 ggtt->base.dev = &dev_priv->drm; 3188 3189 if (INTEL_GEN(dev_priv) <= 5) 3190 ret = i915_gmch_probe(ggtt); 3191 else if (INTEL_GEN(dev_priv) < 8) 3192 ret = gen6_gmch_probe(ggtt); 3193 else 3194 ret = gen8_gmch_probe(ggtt); 3195 if (ret) 3196 return ret; 3197 3198 if ((ggtt->base.total - 1) >> 32) { 3199 DRM_ERROR("We never expected a Global GTT with more than 32bits" 3200 " of address space! Found %lldM!\n", 3201 ggtt->base.total >> 20); 3202 ggtt->base.total = 1ULL << 32; 3203 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); 3204 } 3205 3206 if (ggtt->mappable_end > ggtt->base.total) { 3207 DRM_ERROR("mappable aperture extends past end of GGTT," 3208 " aperture=%llx, total=%llx\n", 3209 ggtt->mappable_end, ggtt->base.total); 3210 ggtt->mappable_end = ggtt->base.total; 3211 } 3212 3213 /* GMADR is the PCI mmio aperture into the global GTT. */ 3214 DRM_INFO("Memory usable by graphics device = %lluM\n", 3215 ggtt->base.total >> 20); 3216 DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20); 3217 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", ggtt->stolen_size >> 20); 3218 #ifdef CONFIG_INTEL_IOMMU 3219 if (intel_iommu_gfx_mapped) 3220 DRM_INFO("VT-d active for gfx access\n"); 3221 #endif 3222 3223 return 0; 3224 } 3225 3226 /** 3227 * i915_ggtt_init_hw - Initialize GGTT hardware 3228 * @dev_priv: i915 device 3229 */ 3230 int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) 3231 { 3232 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3233 int ret; 3234 3235 INIT_LIST_HEAD(&dev_priv->vm_list); 3236 3237 /* Subtract the guard page before address space initialization to 3238 * shrink the range used by drm_mm. 3239 */ 3240 mutex_lock(&dev_priv->drm.struct_mutex); 3241 ggtt->base.total -= PAGE_SIZE; 3242 i915_address_space_init(&ggtt->base, dev_priv, "[global]"); 3243 ggtt->base.total += PAGE_SIZE; 3244 if (!HAS_LLC(dev_priv)) 3245 ggtt->base.mm.color_adjust = i915_gtt_color_adjust; 3246 mutex_unlock(&dev_priv->drm.struct_mutex); 3247 3248 if (!io_mapping_init_wc(&dev_priv->ggtt.mappable, 3249 dev_priv->ggtt.mappable_base, 3250 dev_priv->ggtt.mappable_end)) { 3251 ret = -EIO; 3252 goto out_gtt_cleanup; 3253 } 3254 3255 ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end); 3256 3257 /* 3258 * Initialise stolen early so that we may reserve preallocated 3259 * objects for the BIOS to KMS transition. 3260 */ 3261 ret = i915_gem_init_stolen(dev_priv); 3262 if (ret) 3263 goto out_gtt_cleanup; 3264 3265 return 0; 3266 3267 out_gtt_cleanup: 3268 ggtt->base.cleanup(&ggtt->base); 3269 return ret; 3270 } 3271 3272 int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) 3273 { 3274 if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt()) 3275 return -EIO; 3276 3277 return 0; 3278 } 3279 3280 void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) 3281 { 3282 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3283 struct drm_i915_gem_object *obj, *on; 3284 3285 i915_check_and_clear_faults(dev_priv); 3286 3287 /* First fill our portion of the GTT with scratch pages */ 3288 ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); 3289 3290 ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */ 3291 3292 /* clflush objects bound into the GGTT and rebind them. */ 3293 list_for_each_entry_safe(obj, on, 3294 &dev_priv->mm.bound_list, global_link) { 3295 bool ggtt_bound = false; 3296 struct i915_vma *vma; 3297 3298 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3299 if (vma->vm != &ggtt->base) 3300 continue; 3301 3302 if (!i915_vma_unbind(vma)) 3303 continue; 3304 3305 WARN_ON(i915_vma_bind(vma, obj->cache_level, 3306 PIN_UPDATE)); 3307 ggtt_bound = true; 3308 } 3309 3310 if (ggtt_bound) 3311 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); 3312 } 3313 3314 ggtt->base.closed = false; 3315 3316 if (INTEL_GEN(dev_priv) >= 8) { 3317 if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) 3318 chv_setup_private_ppat(dev_priv); 3319 else 3320 bdw_setup_private_ppat(dev_priv); 3321 3322 return; 3323 } 3324 3325 if (USES_PPGTT(dev_priv)) { 3326 struct i915_address_space *vm; 3327 3328 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 3329 /* TODO: Perhaps it shouldn't be gen6 specific */ 3330 3331 struct i915_hw_ppgtt *ppgtt; 3332 3333 if (i915_is_ggtt(vm)) 3334 ppgtt = dev_priv->mm.aliasing_ppgtt; 3335 else 3336 ppgtt = i915_vm_to_ppgtt(vm); 3337 3338 gen6_write_page_range(dev_priv, &ppgtt->pd, 3339 0, ppgtt->base.total); 3340 } 3341 } 3342 3343 i915_ggtt_flush(dev_priv); 3344 } 3345 3346 struct i915_vma * 3347 i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 3348 struct i915_address_space *vm, 3349 const struct i915_ggtt_view *view) 3350 { 3351 struct rb_node *rb; 3352 3353 rb = obj->vma_tree.rb_node; 3354 while (rb) { 3355 struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node); 3356 long cmp; 3357 3358 cmp = i915_vma_compare(vma, vm, view); 3359 if (cmp == 0) 3360 return vma; 3361 3362 if (cmp < 0) 3363 rb = rb->rb_right; 3364 else 3365 rb = rb->rb_left; 3366 } 3367 3368 return NULL; 3369 } 3370 3371 struct i915_vma * 3372 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 3373 struct i915_address_space *vm, 3374 const struct i915_ggtt_view *view) 3375 { 3376 struct i915_vma *vma; 3377 3378 lockdep_assert_held(&obj->base.dev->struct_mutex); 3379 GEM_BUG_ON(view && !i915_is_ggtt(vm)); 3380 3381 vma = i915_gem_obj_to_vma(obj, vm, view); 3382 if (!vma) { 3383 vma = i915_vma_create(obj, vm, view); 3384 GEM_BUG_ON(vma != i915_gem_obj_to_vma(obj, vm, view)); 3385 } 3386 3387 GEM_BUG_ON(i915_vma_is_closed(vma)); 3388 return vma; 3389 } 3390 3391 static struct scatterlist * 3392 rotate_pages(const dma_addr_t *in, unsigned int offset, 3393 unsigned int width, unsigned int height, 3394 unsigned int stride, 3395 struct sg_table *st, struct scatterlist *sg) 3396 { 3397 unsigned int column, row; 3398 unsigned int src_idx; 3399 3400 for (column = 0; column < width; column++) { 3401 src_idx = stride * (height - 1) + column; 3402 for (row = 0; row < height; row++) { 3403 st->nents++; 3404 /* We don't need the pages, but need to initialize 3405 * the entries so the sg list can be happily traversed. 3406 * The only thing we need are DMA addresses. 3407 */ 3408 sg_set_page(sg, NULL, PAGE_SIZE, 0); 3409 sg_dma_address(sg) = in[offset + src_idx]; 3410 sg_dma_len(sg) = PAGE_SIZE; 3411 sg = sg_next(sg); 3412 src_idx -= stride; 3413 } 3414 } 3415 3416 return sg; 3417 } 3418 3419 static struct sg_table * 3420 intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info, 3421 struct drm_i915_gem_object *obj) 3422 { 3423 const size_t n_pages = obj->base.size / PAGE_SIZE; 3424 unsigned int size = intel_rotation_info_size(rot_info); 3425 struct sgt_iter sgt_iter; 3426 dma_addr_t dma_addr; 3427 unsigned long i; 3428 dma_addr_t *page_addr_list; 3429 struct sg_table *st; 3430 struct scatterlist *sg; 3431 int ret = -ENOMEM; 3432 3433 /* Allocate a temporary list of source pages for random access. */ 3434 page_addr_list = drm_malloc_gfp(n_pages, 3435 sizeof(dma_addr_t), 3436 GFP_TEMPORARY); 3437 if (!page_addr_list) 3438 return ERR_PTR(ret); 3439 3440 /* Allocate target SG list. */ 3441 st = kmalloc(sizeof(*st), GFP_KERNEL); 3442 if (!st) 3443 goto err_st_alloc; 3444 3445 ret = sg_alloc_table(st, size, GFP_KERNEL); 3446 if (ret) 3447 goto err_sg_alloc; 3448 3449 /* Populate source page list from the object. */ 3450 i = 0; 3451 for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages) 3452 page_addr_list[i++] = dma_addr; 3453 3454 GEM_BUG_ON(i != n_pages); 3455 st->nents = 0; 3456 sg = st->sgl; 3457 3458 for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { 3459 sg = rotate_pages(page_addr_list, rot_info->plane[i].offset, 3460 rot_info->plane[i].width, rot_info->plane[i].height, 3461 rot_info->plane[i].stride, st, sg); 3462 } 3463 3464 DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n", 3465 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); 3466 3467 drm_free_large(page_addr_list); 3468 3469 return st; 3470 3471 err_sg_alloc: 3472 kfree(st); 3473 err_st_alloc: 3474 drm_free_large(page_addr_list); 3475 3476 DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", 3477 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); 3478 3479 return ERR_PTR(ret); 3480 } 3481 3482 static struct sg_table * 3483 intel_partial_pages(const struct i915_ggtt_view *view, 3484 struct drm_i915_gem_object *obj) 3485 { 3486 struct sg_table *st; 3487 struct scatterlist *sg, *iter; 3488 unsigned int count = view->params.partial.size; 3489 unsigned int offset; 3490 int ret = -ENOMEM; 3491 3492 st = kmalloc(sizeof(*st), GFP_KERNEL); 3493 if (!st) 3494 goto err_st_alloc; 3495 3496 ret = sg_alloc_table(st, count, GFP_KERNEL); 3497 if (ret) 3498 goto err_sg_alloc; 3499 3500 iter = i915_gem_object_get_sg(obj, 3501 view->params.partial.offset, 3502 &offset); 3503 GEM_BUG_ON(!iter); 3504 3505 sg = st->sgl; 3506 st->nents = 0; 3507 do { 3508 unsigned int len; 3509 3510 len = min(iter->length - (offset << PAGE_SHIFT), 3511 count << PAGE_SHIFT); 3512 sg_set_page(sg, NULL, len, 0); 3513 sg_dma_address(sg) = 3514 sg_dma_address(iter) + (offset << PAGE_SHIFT); 3515 sg_dma_len(sg) = len; 3516 3517 st->nents++; 3518 count -= len >> PAGE_SHIFT; 3519 if (count == 0) { 3520 sg_mark_end(sg); 3521 return st; 3522 } 3523 3524 sg = __sg_next(sg); 3525 iter = __sg_next(iter); 3526 offset = 0; 3527 } while (1); 3528 3529 err_sg_alloc: 3530 kfree(st); 3531 err_st_alloc: 3532 return ERR_PTR(ret); 3533 } 3534 3535 static int 3536 i915_get_ggtt_vma_pages(struct i915_vma *vma) 3537 { 3538 int ret = 0; 3539 3540 /* The vma->pages are only valid within the lifespan of the borrowed 3541 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so 3542 * must be the vma->pages. A simple rule is that vma->pages must only 3543 * be accessed when the obj->mm.pages are pinned. 3544 */ 3545 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); 3546 3547 if (vma->pages) 3548 return 0; 3549 3550 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) 3551 vma->pages = vma->obj->mm.pages; 3552 else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) 3553 vma->pages = 3554 intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj); 3555 else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) 3556 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); 3557 else 3558 WARN_ONCE(1, "GGTT view %u not implemented!\n", 3559 vma->ggtt_view.type); 3560 3561 if (!vma->pages) { 3562 DRM_ERROR("Failed to get pages for GGTT view type %u!\n", 3563 vma->ggtt_view.type); 3564 ret = -EINVAL; 3565 } else if (IS_ERR(vma->pages)) { 3566 ret = PTR_ERR(vma->pages); 3567 vma->pages = NULL; 3568 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", 3569 vma->ggtt_view.type, ret); 3570 } 3571 3572 return ret; 3573 } 3574 3575