1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/seq_file.h> 27 #include <linux/stop_machine.h> 28 #include <drm/drmP.h> 29 #include <drm/i915_drm.h> 30 #include "i915_drv.h" 31 #include "i915_vgpu.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 #include "intel_frontbuffer.h" 35 36 #define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM) 37 38 /** 39 * DOC: Global GTT views 40 * 41 * Background and previous state 42 * 43 * Historically objects could exists (be bound) in global GTT space only as 44 * singular instances with a view representing all of the object's backing pages 45 * in a linear fashion. This view will be called a normal view. 46 * 47 * To support multiple views of the same object, where the number of mapped 48 * pages is not equal to the backing store, or where the layout of the pages 49 * is not linear, concept of a GGTT view was added. 50 * 51 * One example of an alternative view is a stereo display driven by a single 52 * image. In this case we would have a framebuffer looking like this 53 * (2x2 pages): 54 * 55 * 12 56 * 34 57 * 58 * Above would represent a normal GGTT view as normally mapped for GPU or CPU 59 * rendering. In contrast, fed to the display engine would be an alternative 60 * view which could look something like this: 61 * 62 * 1212 63 * 3434 64 * 65 * In this example both the size and layout of pages in the alternative view is 66 * different from the normal view. 67 * 68 * Implementation and usage 69 * 70 * GGTT views are implemented using VMAs and are distinguished via enum 71 * i915_ggtt_view_type and struct i915_ggtt_view. 72 * 73 * A new flavour of core GEM functions which work with GGTT bound objects were 74 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid 75 * renaming in large amounts of code. They take the struct i915_ggtt_view 76 * parameter encapsulating all metadata required to implement a view. 77 * 78 * As a helper for callers which are only interested in the normal view, 79 * globally const i915_ggtt_view_normal singleton instance exists. All old core 80 * GEM API functions, the ones not taking the view parameter, are operating on, 81 * or with the normal GGTT view. 82 * 83 * Code wanting to add or use a new GGTT view needs to: 84 * 85 * 1. Add a new enum with a suitable name. 86 * 2. Extend the metadata in the i915_ggtt_view structure if required. 87 * 3. Add support to i915_get_vma_pages(). 88 * 89 * New views are required to build a scatter-gather table from within the 90 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and 91 * exists for the lifetime of an VMA. 92 * 93 * Core API is designed to have copy semantics which means that passed in 94 * struct i915_ggtt_view does not need to be persistent (left around after 95 * calling the core API functions). 96 * 97 */ 98 99 static inline struct i915_ggtt * 100 i915_vm_to_ggtt(struct i915_address_space *vm) 101 { 102 GEM_BUG_ON(!i915_is_ggtt(vm)); 103 return container_of(vm, struct i915_ggtt, base); 104 } 105 106 static int 107 i915_get_ggtt_vma_pages(struct i915_vma *vma); 108 109 const struct i915_ggtt_view i915_ggtt_view_normal = { 110 .type = I915_GGTT_VIEW_NORMAL, 111 }; 112 const struct i915_ggtt_view i915_ggtt_view_rotated = { 113 .type = I915_GGTT_VIEW_ROTATED, 114 }; 115 116 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, 117 int enable_ppgtt) 118 { 119 bool has_aliasing_ppgtt; 120 bool has_full_ppgtt; 121 bool has_full_48bit_ppgtt; 122 123 has_aliasing_ppgtt = INTEL_GEN(dev_priv) >= 6; 124 has_full_ppgtt = INTEL_GEN(dev_priv) >= 7; 125 has_full_48bit_ppgtt = 126 IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9; 127 128 if (intel_vgpu_active(dev_priv)) { 129 /* emulation is too hard */ 130 has_full_ppgtt = false; 131 has_full_48bit_ppgtt = false; 132 } 133 134 if (!has_aliasing_ppgtt) 135 return 0; 136 137 /* 138 * We don't allow disabling PPGTT for gen9+ as it's a requirement for 139 * execlists, the sole mechanism available to submit work. 140 */ 141 if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9) 142 return 0; 143 144 if (enable_ppgtt == 1) 145 return 1; 146 147 if (enable_ppgtt == 2 && has_full_ppgtt) 148 return 2; 149 150 if (enable_ppgtt == 3 && has_full_48bit_ppgtt) 151 return 3; 152 153 #ifdef CONFIG_INTEL_IOMMU 154 /* Disable ppgtt on SNB if VT-d is on. */ 155 if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) { 156 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 157 return 0; 158 } 159 #endif 160 161 /* Early VLV doesn't have this */ 162 if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) { 163 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n"); 164 return 0; 165 } 166 167 if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt) 168 return has_full_48bit_ppgtt ? 3 : 2; 169 else 170 return has_aliasing_ppgtt ? 1 : 0; 171 } 172 173 static int ppgtt_bind_vma(struct i915_vma *vma, 174 enum i915_cache_level cache_level, 175 u32 unused) 176 { 177 u32 pte_flags = 0; 178 179 vma->pages = vma->obj->mm.pages; 180 181 /* Currently applicable only to VLV */ 182 if (vma->obj->gt_ro) 183 pte_flags |= PTE_READ_ONLY; 184 185 vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, 186 cache_level, pte_flags); 187 188 return 0; 189 } 190 191 static void ppgtt_unbind_vma(struct i915_vma *vma) 192 { 193 vma->vm->clear_range(vma->vm, 194 vma->node.start, 195 vma->size); 196 } 197 198 static gen8_pte_t gen8_pte_encode(dma_addr_t addr, 199 enum i915_cache_level level) 200 { 201 gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW; 202 pte |= addr; 203 204 switch (level) { 205 case I915_CACHE_NONE: 206 pte |= PPAT_UNCACHED_INDEX; 207 break; 208 case I915_CACHE_WT: 209 pte |= PPAT_DISPLAY_ELLC_INDEX; 210 break; 211 default: 212 pte |= PPAT_CACHED_INDEX; 213 break; 214 } 215 216 return pte; 217 } 218 219 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, 220 const enum i915_cache_level level) 221 { 222 gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 223 pde |= addr; 224 if (level != I915_CACHE_NONE) 225 pde |= PPAT_CACHED_PDE_INDEX; 226 else 227 pde |= PPAT_UNCACHED_INDEX; 228 return pde; 229 } 230 231 #define gen8_pdpe_encode gen8_pde_encode 232 #define gen8_pml4e_encode gen8_pde_encode 233 234 static gen6_pte_t snb_pte_encode(dma_addr_t addr, 235 enum i915_cache_level level, 236 u32 unused) 237 { 238 gen6_pte_t pte = GEN6_PTE_VALID; 239 pte |= GEN6_PTE_ADDR_ENCODE(addr); 240 241 switch (level) { 242 case I915_CACHE_L3_LLC: 243 case I915_CACHE_LLC: 244 pte |= GEN6_PTE_CACHE_LLC; 245 break; 246 case I915_CACHE_NONE: 247 pte |= GEN6_PTE_UNCACHED; 248 break; 249 default: 250 MISSING_CASE(level); 251 } 252 253 return pte; 254 } 255 256 static gen6_pte_t ivb_pte_encode(dma_addr_t addr, 257 enum i915_cache_level level, 258 u32 unused) 259 { 260 gen6_pte_t pte = GEN6_PTE_VALID; 261 pte |= GEN6_PTE_ADDR_ENCODE(addr); 262 263 switch (level) { 264 case I915_CACHE_L3_LLC: 265 pte |= GEN7_PTE_CACHE_L3_LLC; 266 break; 267 case I915_CACHE_LLC: 268 pte |= GEN6_PTE_CACHE_LLC; 269 break; 270 case I915_CACHE_NONE: 271 pte |= GEN6_PTE_UNCACHED; 272 break; 273 default: 274 MISSING_CASE(level); 275 } 276 277 return pte; 278 } 279 280 static gen6_pte_t byt_pte_encode(dma_addr_t addr, 281 enum i915_cache_level level, 282 u32 flags) 283 { 284 gen6_pte_t pte = GEN6_PTE_VALID; 285 pte |= GEN6_PTE_ADDR_ENCODE(addr); 286 287 if (!(flags & PTE_READ_ONLY)) 288 pte |= BYT_PTE_WRITEABLE; 289 290 if (level != I915_CACHE_NONE) 291 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 292 293 return pte; 294 } 295 296 static gen6_pte_t hsw_pte_encode(dma_addr_t addr, 297 enum i915_cache_level level, 298 u32 unused) 299 { 300 gen6_pte_t pte = GEN6_PTE_VALID; 301 pte |= HSW_PTE_ADDR_ENCODE(addr); 302 303 if (level != I915_CACHE_NONE) 304 pte |= HSW_WB_LLC_AGE3; 305 306 return pte; 307 } 308 309 static gen6_pte_t iris_pte_encode(dma_addr_t addr, 310 enum i915_cache_level level, 311 u32 unused) 312 { 313 gen6_pte_t pte = GEN6_PTE_VALID; 314 pte |= HSW_PTE_ADDR_ENCODE(addr); 315 316 switch (level) { 317 case I915_CACHE_NONE: 318 break; 319 case I915_CACHE_WT: 320 pte |= HSW_WT_ELLC_LLC_AGE3; 321 break; 322 default: 323 pte |= HSW_WB_ELLC_LLC_AGE3; 324 break; 325 } 326 327 return pte; 328 } 329 330 static int __setup_page_dma(struct drm_device *dev, 331 struct i915_page_dma *p, gfp_t flags) 332 { 333 struct device *kdev = &dev->pdev->dev; 334 335 p->page = alloc_page(flags); 336 if (!p->page) 337 return -ENOMEM; 338 339 p->daddr = dma_map_page(kdev, 340 p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL); 341 342 if (dma_mapping_error(kdev, p->daddr)) { 343 __free_page(p->page); 344 return -EINVAL; 345 } 346 347 return 0; 348 } 349 350 static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p) 351 { 352 return __setup_page_dma(dev, p, I915_GFP_DMA); 353 } 354 355 static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p) 356 { 357 struct pci_dev *pdev = dev->pdev; 358 359 if (WARN_ON(!p->page)) 360 return; 361 362 dma_unmap_page(&pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL); 363 __free_page(p->page); 364 memset(p, 0, sizeof(*p)); 365 } 366 367 static void *kmap_page_dma(struct i915_page_dma *p) 368 { 369 return kmap_atomic(p->page); 370 } 371 372 /* We use the flushing unmap only with ppgtt structures: 373 * page directories, page tables and scratch pages. 374 */ 375 static void kunmap_page_dma(struct drm_i915_private *dev_priv, void *vaddr) 376 { 377 /* There are only few exceptions for gen >=6. chv and bxt. 378 * And we are not sure about the latter so play safe for now. 379 */ 380 if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) 381 drm_clflush_virt_range(vaddr, PAGE_SIZE); 382 383 kunmap_atomic(vaddr); 384 } 385 386 #define kmap_px(px) kmap_page_dma(px_base(px)) 387 #define kunmap_px(ppgtt, vaddr) \ 388 kunmap_page_dma(to_i915((ppgtt)->base.dev), (vaddr)) 389 390 #define setup_px(dev, px) setup_page_dma((dev), px_base(px)) 391 #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px)) 392 #define fill_px(dev_priv, px, v) fill_page_dma((dev_priv), px_base(px), (v)) 393 #define fill32_px(dev_priv, px, v) \ 394 fill_page_dma_32((dev_priv), px_base(px), (v)) 395 396 static void fill_page_dma(struct drm_i915_private *dev_priv, 397 struct i915_page_dma *p, const uint64_t val) 398 { 399 int i; 400 uint64_t * const vaddr = kmap_page_dma(p); 401 402 for (i = 0; i < 512; i++) 403 vaddr[i] = val; 404 405 kunmap_page_dma(dev_priv, vaddr); 406 } 407 408 static void fill_page_dma_32(struct drm_i915_private *dev_priv, 409 struct i915_page_dma *p, const uint32_t val32) 410 { 411 uint64_t v = val32; 412 413 v = v << 32 | val32; 414 415 fill_page_dma(dev_priv, p, v); 416 } 417 418 static int 419 setup_scratch_page(struct drm_device *dev, 420 struct i915_page_dma *scratch, 421 gfp_t gfp) 422 { 423 return __setup_page_dma(dev, scratch, gfp | __GFP_ZERO); 424 } 425 426 static void cleanup_scratch_page(struct drm_device *dev, 427 struct i915_page_dma *scratch) 428 { 429 cleanup_page_dma(dev, scratch); 430 } 431 432 static struct i915_page_table *alloc_pt(struct drm_device *dev) 433 { 434 struct i915_page_table *pt; 435 const size_t count = INTEL_INFO(dev)->gen >= 8 ? 436 GEN8_PTES : GEN6_PTES; 437 int ret = -ENOMEM; 438 439 pt = kzalloc(sizeof(*pt), GFP_KERNEL); 440 if (!pt) 441 return ERR_PTR(-ENOMEM); 442 443 pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), 444 GFP_KERNEL); 445 446 if (!pt->used_ptes) 447 goto fail_bitmap; 448 449 ret = setup_px(dev, pt); 450 if (ret) 451 goto fail_page_m; 452 453 return pt; 454 455 fail_page_m: 456 kfree(pt->used_ptes); 457 fail_bitmap: 458 kfree(pt); 459 460 return ERR_PTR(ret); 461 } 462 463 static void free_pt(struct drm_device *dev, struct i915_page_table *pt) 464 { 465 cleanup_px(dev, pt); 466 kfree(pt->used_ptes); 467 kfree(pt); 468 } 469 470 static void gen8_initialize_pt(struct i915_address_space *vm, 471 struct i915_page_table *pt) 472 { 473 gen8_pte_t scratch_pte; 474 475 scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 476 I915_CACHE_LLC); 477 478 fill_px(to_i915(vm->dev), pt, scratch_pte); 479 } 480 481 static void gen6_initialize_pt(struct i915_address_space *vm, 482 struct i915_page_table *pt) 483 { 484 gen6_pte_t scratch_pte; 485 486 WARN_ON(vm->scratch_page.daddr == 0); 487 488 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 489 I915_CACHE_LLC, 0); 490 491 fill32_px(to_i915(vm->dev), pt, scratch_pte); 492 } 493 494 static struct i915_page_directory *alloc_pd(struct drm_device *dev) 495 { 496 struct i915_page_directory *pd; 497 int ret = -ENOMEM; 498 499 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 500 if (!pd) 501 return ERR_PTR(-ENOMEM); 502 503 pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES), 504 sizeof(*pd->used_pdes), GFP_KERNEL); 505 if (!pd->used_pdes) 506 goto fail_bitmap; 507 508 ret = setup_px(dev, pd); 509 if (ret) 510 goto fail_page_m; 511 512 return pd; 513 514 fail_page_m: 515 kfree(pd->used_pdes); 516 fail_bitmap: 517 kfree(pd); 518 519 return ERR_PTR(ret); 520 } 521 522 static void free_pd(struct drm_device *dev, struct i915_page_directory *pd) 523 { 524 if (px_page(pd)) { 525 cleanup_px(dev, pd); 526 kfree(pd->used_pdes); 527 kfree(pd); 528 } 529 } 530 531 static void gen8_initialize_pd(struct i915_address_space *vm, 532 struct i915_page_directory *pd) 533 { 534 gen8_pde_t scratch_pde; 535 536 scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC); 537 538 fill_px(to_i915(vm->dev), pd, scratch_pde); 539 } 540 541 static int __pdp_init(struct drm_device *dev, 542 struct i915_page_directory_pointer *pdp) 543 { 544 size_t pdpes = I915_PDPES_PER_PDP(dev); 545 546 pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), 547 sizeof(unsigned long), 548 GFP_KERNEL); 549 if (!pdp->used_pdpes) 550 return -ENOMEM; 551 552 pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory), 553 GFP_KERNEL); 554 if (!pdp->page_directory) { 555 kfree(pdp->used_pdpes); 556 /* the PDP might be the statically allocated top level. Keep it 557 * as clean as possible */ 558 pdp->used_pdpes = NULL; 559 return -ENOMEM; 560 } 561 562 return 0; 563 } 564 565 static void __pdp_fini(struct i915_page_directory_pointer *pdp) 566 { 567 kfree(pdp->used_pdpes); 568 kfree(pdp->page_directory); 569 pdp->page_directory = NULL; 570 } 571 572 static struct 573 i915_page_directory_pointer *alloc_pdp(struct drm_device *dev) 574 { 575 struct i915_page_directory_pointer *pdp; 576 int ret = -ENOMEM; 577 578 WARN_ON(!USES_FULL_48BIT_PPGTT(dev)); 579 580 pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); 581 if (!pdp) 582 return ERR_PTR(-ENOMEM); 583 584 ret = __pdp_init(dev, pdp); 585 if (ret) 586 goto fail_bitmap; 587 588 ret = setup_px(dev, pdp); 589 if (ret) 590 goto fail_page_m; 591 592 return pdp; 593 594 fail_page_m: 595 __pdp_fini(pdp); 596 fail_bitmap: 597 kfree(pdp); 598 599 return ERR_PTR(ret); 600 } 601 602 static void free_pdp(struct drm_device *dev, 603 struct i915_page_directory_pointer *pdp) 604 { 605 __pdp_fini(pdp); 606 if (USES_FULL_48BIT_PPGTT(dev)) { 607 cleanup_px(dev, pdp); 608 kfree(pdp); 609 } 610 } 611 612 static void gen8_initialize_pdp(struct i915_address_space *vm, 613 struct i915_page_directory_pointer *pdp) 614 { 615 gen8_ppgtt_pdpe_t scratch_pdpe; 616 617 scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); 618 619 fill_px(to_i915(vm->dev), pdp, scratch_pdpe); 620 } 621 622 static void gen8_initialize_pml4(struct i915_address_space *vm, 623 struct i915_pml4 *pml4) 624 { 625 gen8_ppgtt_pml4e_t scratch_pml4e; 626 627 scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), 628 I915_CACHE_LLC); 629 630 fill_px(to_i915(vm->dev), pml4, scratch_pml4e); 631 } 632 633 static void 634 gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt, 635 struct i915_page_directory_pointer *pdp, 636 struct i915_page_directory *pd, 637 int index) 638 { 639 gen8_ppgtt_pdpe_t *page_directorypo; 640 641 if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) 642 return; 643 644 page_directorypo = kmap_px(pdp); 645 page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); 646 kunmap_px(ppgtt, page_directorypo); 647 } 648 649 static void 650 gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt, 651 struct i915_pml4 *pml4, 652 struct i915_page_directory_pointer *pdp, 653 int index) 654 { 655 gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); 656 657 WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)); 658 pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); 659 kunmap_px(ppgtt, pagemap); 660 } 661 662 /* Broadwell Page Directory Pointer Descriptors */ 663 static int gen8_write_pdp(struct drm_i915_gem_request *req, 664 unsigned entry, 665 dma_addr_t addr) 666 { 667 struct intel_ring *ring = req->ring; 668 struct intel_engine_cs *engine = req->engine; 669 int ret; 670 671 BUG_ON(entry >= 4); 672 673 ret = intel_ring_begin(req, 6); 674 if (ret) 675 return ret; 676 677 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 678 intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, entry)); 679 intel_ring_emit(ring, upper_32_bits(addr)); 680 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 681 intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, entry)); 682 intel_ring_emit(ring, lower_32_bits(addr)); 683 intel_ring_advance(ring); 684 685 return 0; 686 } 687 688 static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, 689 struct drm_i915_gem_request *req) 690 { 691 int i, ret; 692 693 for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { 694 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); 695 696 ret = gen8_write_pdp(req, i, pd_daddr); 697 if (ret) 698 return ret; 699 } 700 701 return 0; 702 } 703 704 static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, 705 struct drm_i915_gem_request *req) 706 { 707 return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); 708 } 709 710 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify 711 * the page table structures, we mark them dirty so that 712 * context switching/execlist queuing code takes extra steps 713 * to ensure that tlbs are flushed. 714 */ 715 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) 716 { 717 ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask; 718 } 719 720 /* Removes entries from a single page table, releasing it if it's empty. 721 * Caller can use the return value to update higher-level entries. 722 */ 723 static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, 724 struct i915_page_table *pt, 725 uint64_t start, 726 uint64_t length) 727 { 728 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 729 unsigned int num_entries = gen8_pte_count(start, length); 730 unsigned int pte = gen8_pte_index(start); 731 unsigned int pte_end = pte + num_entries; 732 gen8_pte_t *pt_vaddr; 733 gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 734 I915_CACHE_LLC); 735 736 if (WARN_ON(!px_page(pt))) 737 return false; 738 739 GEM_BUG_ON(pte_end > GEN8_PTES); 740 741 bitmap_clear(pt->used_ptes, pte, num_entries); 742 743 if (bitmap_empty(pt->used_ptes, GEN8_PTES)) { 744 free_pt(vm->dev, pt); 745 return true; 746 } 747 748 pt_vaddr = kmap_px(pt); 749 750 while (pte < pte_end) 751 pt_vaddr[pte++] = scratch_pte; 752 753 kunmap_px(ppgtt, pt_vaddr); 754 755 return false; 756 } 757 758 /* Removes entries from a single page dir, releasing it if it's empty. 759 * Caller can use the return value to update higher-level entries 760 */ 761 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, 762 struct i915_page_directory *pd, 763 uint64_t start, 764 uint64_t length) 765 { 766 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 767 struct i915_page_table *pt; 768 uint64_t pde; 769 gen8_pde_t *pde_vaddr; 770 gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), 771 I915_CACHE_LLC); 772 773 gen8_for_each_pde(pt, pd, start, length, pde) { 774 if (WARN_ON(!pd->page_table[pde])) 775 break; 776 777 if (gen8_ppgtt_clear_pt(vm, pt, start, length)) { 778 __clear_bit(pde, pd->used_pdes); 779 pde_vaddr = kmap_px(pd); 780 pde_vaddr[pde] = scratch_pde; 781 kunmap_px(ppgtt, pde_vaddr); 782 } 783 } 784 785 if (bitmap_empty(pd->used_pdes, I915_PDES)) { 786 free_pd(vm->dev, pd); 787 return true; 788 } 789 790 return false; 791 } 792 793 /* Removes entries from a single page dir pointer, releasing it if it's empty. 794 * Caller can use the return value to update higher-level entries 795 */ 796 static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, 797 struct i915_page_directory_pointer *pdp, 798 uint64_t start, 799 uint64_t length) 800 { 801 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 802 struct i915_page_directory *pd; 803 uint64_t pdpe; 804 gen8_ppgtt_pdpe_t *pdpe_vaddr; 805 gen8_ppgtt_pdpe_t scratch_pdpe = 806 gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); 807 808 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 809 if (WARN_ON(!pdp->page_directory[pdpe])) 810 break; 811 812 if (gen8_ppgtt_clear_pd(vm, pd, start, length)) { 813 __clear_bit(pdpe, pdp->used_pdpes); 814 if (USES_FULL_48BIT_PPGTT(vm->dev)) { 815 pdpe_vaddr = kmap_px(pdp); 816 pdpe_vaddr[pdpe] = scratch_pdpe; 817 kunmap_px(ppgtt, pdpe_vaddr); 818 } 819 } 820 } 821 822 mark_tlbs_dirty(ppgtt); 823 824 if (USES_FULL_48BIT_PPGTT(vm->dev) && 825 bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(vm->dev))) { 826 free_pdp(vm->dev, pdp); 827 return true; 828 } 829 830 return false; 831 } 832 833 /* Removes entries from a single pml4. 834 * This is the top-level structure in 4-level page tables used on gen8+. 835 * Empty entries are always scratch pml4e. 836 */ 837 static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, 838 struct i915_pml4 *pml4, 839 uint64_t start, 840 uint64_t length) 841 { 842 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 843 struct i915_page_directory_pointer *pdp; 844 uint64_t pml4e; 845 gen8_ppgtt_pml4e_t *pml4e_vaddr; 846 gen8_ppgtt_pml4e_t scratch_pml4e = 847 gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC); 848 849 GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(vm->dev)); 850 851 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 852 if (WARN_ON(!pml4->pdps[pml4e])) 853 break; 854 855 if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) { 856 __clear_bit(pml4e, pml4->used_pml4es); 857 pml4e_vaddr = kmap_px(pml4); 858 pml4e_vaddr[pml4e] = scratch_pml4e; 859 kunmap_px(ppgtt, pml4e_vaddr); 860 } 861 } 862 } 863 864 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 865 uint64_t start, uint64_t length) 866 { 867 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 868 869 if (USES_FULL_48BIT_PPGTT(vm->dev)) 870 gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length); 871 else 872 gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length); 873 } 874 875 static void 876 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, 877 struct i915_page_directory_pointer *pdp, 878 struct sg_page_iter *sg_iter, 879 uint64_t start, 880 enum i915_cache_level cache_level) 881 { 882 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 883 gen8_pte_t *pt_vaddr; 884 unsigned pdpe = gen8_pdpe_index(start); 885 unsigned pde = gen8_pde_index(start); 886 unsigned pte = gen8_pte_index(start); 887 888 pt_vaddr = NULL; 889 890 while (__sg_page_iter_next(sg_iter)) { 891 if (pt_vaddr == NULL) { 892 struct i915_page_directory *pd = pdp->page_directory[pdpe]; 893 struct i915_page_table *pt = pd->page_table[pde]; 894 pt_vaddr = kmap_px(pt); 895 } 896 897 pt_vaddr[pte] = 898 gen8_pte_encode(sg_page_iter_dma_address(sg_iter), 899 cache_level); 900 if (++pte == GEN8_PTES) { 901 kunmap_px(ppgtt, pt_vaddr); 902 pt_vaddr = NULL; 903 if (++pde == I915_PDES) { 904 if (++pdpe == I915_PDPES_PER_PDP(vm->dev)) 905 break; 906 pde = 0; 907 } 908 pte = 0; 909 } 910 } 911 912 if (pt_vaddr) 913 kunmap_px(ppgtt, pt_vaddr); 914 } 915 916 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 917 struct sg_table *pages, 918 uint64_t start, 919 enum i915_cache_level cache_level, 920 u32 unused) 921 { 922 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 923 struct sg_page_iter sg_iter; 924 925 __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); 926 927 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 928 gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start, 929 cache_level); 930 } else { 931 struct i915_page_directory_pointer *pdp; 932 uint64_t pml4e; 933 uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT; 934 935 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { 936 gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, 937 start, cache_level); 938 } 939 } 940 } 941 942 static void gen8_free_page_tables(struct drm_device *dev, 943 struct i915_page_directory *pd) 944 { 945 int i; 946 947 if (!px_page(pd)) 948 return; 949 950 for_each_set_bit(i, pd->used_pdes, I915_PDES) { 951 if (WARN_ON(!pd->page_table[i])) 952 continue; 953 954 free_pt(dev, pd->page_table[i]); 955 pd->page_table[i] = NULL; 956 } 957 } 958 959 static int gen8_init_scratch(struct i915_address_space *vm) 960 { 961 struct drm_device *dev = vm->dev; 962 int ret; 963 964 ret = setup_scratch_page(dev, &vm->scratch_page, I915_GFP_DMA); 965 if (ret) 966 return ret; 967 968 vm->scratch_pt = alloc_pt(dev); 969 if (IS_ERR(vm->scratch_pt)) { 970 ret = PTR_ERR(vm->scratch_pt); 971 goto free_scratch_page; 972 } 973 974 vm->scratch_pd = alloc_pd(dev); 975 if (IS_ERR(vm->scratch_pd)) { 976 ret = PTR_ERR(vm->scratch_pd); 977 goto free_pt; 978 } 979 980 if (USES_FULL_48BIT_PPGTT(dev)) { 981 vm->scratch_pdp = alloc_pdp(dev); 982 if (IS_ERR(vm->scratch_pdp)) { 983 ret = PTR_ERR(vm->scratch_pdp); 984 goto free_pd; 985 } 986 } 987 988 gen8_initialize_pt(vm, vm->scratch_pt); 989 gen8_initialize_pd(vm, vm->scratch_pd); 990 if (USES_FULL_48BIT_PPGTT(dev)) 991 gen8_initialize_pdp(vm, vm->scratch_pdp); 992 993 return 0; 994 995 free_pd: 996 free_pd(dev, vm->scratch_pd); 997 free_pt: 998 free_pt(dev, vm->scratch_pt); 999 free_scratch_page: 1000 cleanup_scratch_page(dev, &vm->scratch_page); 1001 1002 return ret; 1003 } 1004 1005 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) 1006 { 1007 enum vgt_g2v_type msg; 1008 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 1009 int i; 1010 1011 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 1012 u64 daddr = px_dma(&ppgtt->pml4); 1013 1014 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); 1015 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); 1016 1017 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : 1018 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); 1019 } else { 1020 for (i = 0; i < GEN8_LEGACY_PDPES; i++) { 1021 u64 daddr = i915_page_dir_dma_addr(ppgtt, i); 1022 1023 I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); 1024 I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); 1025 } 1026 1027 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : 1028 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); 1029 } 1030 1031 I915_WRITE(vgtif_reg(g2v_notify), msg); 1032 1033 return 0; 1034 } 1035 1036 static void gen8_free_scratch(struct i915_address_space *vm) 1037 { 1038 struct drm_device *dev = vm->dev; 1039 1040 if (USES_FULL_48BIT_PPGTT(dev)) 1041 free_pdp(dev, vm->scratch_pdp); 1042 free_pd(dev, vm->scratch_pd); 1043 free_pt(dev, vm->scratch_pt); 1044 cleanup_scratch_page(dev, &vm->scratch_page); 1045 } 1046 1047 static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev, 1048 struct i915_page_directory_pointer *pdp) 1049 { 1050 int i; 1051 1052 for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { 1053 if (WARN_ON(!pdp->page_directory[i])) 1054 continue; 1055 1056 gen8_free_page_tables(dev, pdp->page_directory[i]); 1057 free_pd(dev, pdp->page_directory[i]); 1058 } 1059 1060 free_pdp(dev, pdp); 1061 } 1062 1063 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) 1064 { 1065 int i; 1066 1067 for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { 1068 if (WARN_ON(!ppgtt->pml4.pdps[i])) 1069 continue; 1070 1071 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]); 1072 } 1073 1074 cleanup_px(ppgtt->base.dev, &ppgtt->pml4); 1075 } 1076 1077 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 1078 { 1079 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1080 1081 if (intel_vgpu_active(to_i915(vm->dev))) 1082 gen8_ppgtt_notify_vgt(ppgtt, false); 1083 1084 if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) 1085 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp); 1086 else 1087 gen8_ppgtt_cleanup_4lvl(ppgtt); 1088 1089 gen8_free_scratch(vm); 1090 } 1091 1092 /** 1093 * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. 1094 * @vm: Master vm structure. 1095 * @pd: Page directory for this address range. 1096 * @start: Starting virtual address to begin allocations. 1097 * @length: Size of the allocations. 1098 * @new_pts: Bitmap set by function with new allocations. Likely used by the 1099 * caller to free on error. 1100 * 1101 * Allocate the required number of page tables. Extremely similar to 1102 * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by 1103 * the page directory boundary (instead of the page directory pointer). That 1104 * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is 1105 * possible, and likely that the caller will need to use multiple calls of this 1106 * function to achieve the appropriate allocation. 1107 * 1108 * Return: 0 if success; negative error code otherwise. 1109 */ 1110 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, 1111 struct i915_page_directory *pd, 1112 uint64_t start, 1113 uint64_t length, 1114 unsigned long *new_pts) 1115 { 1116 struct drm_device *dev = vm->dev; 1117 struct i915_page_table *pt; 1118 uint32_t pde; 1119 1120 gen8_for_each_pde(pt, pd, start, length, pde) { 1121 /* Don't reallocate page tables */ 1122 if (test_bit(pde, pd->used_pdes)) { 1123 /* Scratch is never allocated this way */ 1124 WARN_ON(pt == vm->scratch_pt); 1125 continue; 1126 } 1127 1128 pt = alloc_pt(dev); 1129 if (IS_ERR(pt)) 1130 goto unwind_out; 1131 1132 gen8_initialize_pt(vm, pt); 1133 pd->page_table[pde] = pt; 1134 __set_bit(pde, new_pts); 1135 trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); 1136 } 1137 1138 return 0; 1139 1140 unwind_out: 1141 for_each_set_bit(pde, new_pts, I915_PDES) 1142 free_pt(dev, pd->page_table[pde]); 1143 1144 return -ENOMEM; 1145 } 1146 1147 /** 1148 * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range. 1149 * @vm: Master vm structure. 1150 * @pdp: Page directory pointer for this address range. 1151 * @start: Starting virtual address to begin allocations. 1152 * @length: Size of the allocations. 1153 * @new_pds: Bitmap set by function with new allocations. Likely used by the 1154 * caller to free on error. 1155 * 1156 * Allocate the required number of page directories starting at the pde index of 1157 * @start, and ending at the pde index @start + @length. This function will skip 1158 * over already allocated page directories within the range, and only allocate 1159 * new ones, setting the appropriate pointer within the pdp as well as the 1160 * correct position in the bitmap @new_pds. 1161 * 1162 * The function will only allocate the pages within the range for a give page 1163 * directory pointer. In other words, if @start + @length straddles a virtually 1164 * addressed PDP boundary (512GB for 4k pages), there will be more allocations 1165 * required by the caller, This is not currently possible, and the BUG in the 1166 * code will prevent it. 1167 * 1168 * Return: 0 if success; negative error code otherwise. 1169 */ 1170 static int 1171 gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, 1172 struct i915_page_directory_pointer *pdp, 1173 uint64_t start, 1174 uint64_t length, 1175 unsigned long *new_pds) 1176 { 1177 struct drm_device *dev = vm->dev; 1178 struct i915_page_directory *pd; 1179 uint32_t pdpe; 1180 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1181 1182 WARN_ON(!bitmap_empty(new_pds, pdpes)); 1183 1184 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1185 if (test_bit(pdpe, pdp->used_pdpes)) 1186 continue; 1187 1188 pd = alloc_pd(dev); 1189 if (IS_ERR(pd)) 1190 goto unwind_out; 1191 1192 gen8_initialize_pd(vm, pd); 1193 pdp->page_directory[pdpe] = pd; 1194 __set_bit(pdpe, new_pds); 1195 trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT); 1196 } 1197 1198 return 0; 1199 1200 unwind_out: 1201 for_each_set_bit(pdpe, new_pds, pdpes) 1202 free_pd(dev, pdp->page_directory[pdpe]); 1203 1204 return -ENOMEM; 1205 } 1206 1207 /** 1208 * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. 1209 * @vm: Master vm structure. 1210 * @pml4: Page map level 4 for this address range. 1211 * @start: Starting virtual address to begin allocations. 1212 * @length: Size of the allocations. 1213 * @new_pdps: Bitmap set by function with new allocations. Likely used by the 1214 * caller to free on error. 1215 * 1216 * Allocate the required number of page directory pointers. Extremely similar to 1217 * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). 1218 * The main difference is here we are limited by the pml4 boundary (instead of 1219 * the page directory pointer). 1220 * 1221 * Return: 0 if success; negative error code otherwise. 1222 */ 1223 static int 1224 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, 1225 struct i915_pml4 *pml4, 1226 uint64_t start, 1227 uint64_t length, 1228 unsigned long *new_pdps) 1229 { 1230 struct drm_device *dev = vm->dev; 1231 struct i915_page_directory_pointer *pdp; 1232 uint32_t pml4e; 1233 1234 WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); 1235 1236 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1237 if (!test_bit(pml4e, pml4->used_pml4es)) { 1238 pdp = alloc_pdp(dev); 1239 if (IS_ERR(pdp)) 1240 goto unwind_out; 1241 1242 gen8_initialize_pdp(vm, pdp); 1243 pml4->pdps[pml4e] = pdp; 1244 __set_bit(pml4e, new_pdps); 1245 trace_i915_page_directory_pointer_entry_alloc(vm, 1246 pml4e, 1247 start, 1248 GEN8_PML4E_SHIFT); 1249 } 1250 } 1251 1252 return 0; 1253 1254 unwind_out: 1255 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1256 free_pdp(dev, pml4->pdps[pml4e]); 1257 1258 return -ENOMEM; 1259 } 1260 1261 static void 1262 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts) 1263 { 1264 kfree(new_pts); 1265 kfree(new_pds); 1266 } 1267 1268 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both 1269 * of these are based on the number of PDPEs in the system. 1270 */ 1271 static 1272 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, 1273 unsigned long **new_pts, 1274 uint32_t pdpes) 1275 { 1276 unsigned long *pds; 1277 unsigned long *pts; 1278 1279 pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY); 1280 if (!pds) 1281 return -ENOMEM; 1282 1283 pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long), 1284 GFP_TEMPORARY); 1285 if (!pts) 1286 goto err_out; 1287 1288 *new_pds = pds; 1289 *new_pts = pts; 1290 1291 return 0; 1292 1293 err_out: 1294 free_gen8_temp_bitmaps(pds, pts); 1295 return -ENOMEM; 1296 } 1297 1298 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, 1299 struct i915_page_directory_pointer *pdp, 1300 uint64_t start, 1301 uint64_t length) 1302 { 1303 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1304 unsigned long *new_page_dirs, *new_page_tables; 1305 struct drm_device *dev = vm->dev; 1306 struct i915_page_directory *pd; 1307 const uint64_t orig_start = start; 1308 const uint64_t orig_length = length; 1309 uint32_t pdpe; 1310 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1311 int ret; 1312 1313 /* Wrap is never okay since we can only represent 48b, and we don't 1314 * actually use the other side of the canonical address space. 1315 */ 1316 if (WARN_ON(start + length < start)) 1317 return -ENODEV; 1318 1319 if (WARN_ON(start + length > vm->total)) 1320 return -ENODEV; 1321 1322 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1323 if (ret) 1324 return ret; 1325 1326 /* Do the allocations first so we can easily bail out */ 1327 ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, 1328 new_page_dirs); 1329 if (ret) { 1330 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1331 return ret; 1332 } 1333 1334 /* For every page directory referenced, allocate page tables */ 1335 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1336 ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length, 1337 new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES)); 1338 if (ret) 1339 goto err_out; 1340 } 1341 1342 start = orig_start; 1343 length = orig_length; 1344 1345 /* Allocations have completed successfully, so set the bitmaps, and do 1346 * the mappings. */ 1347 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1348 gen8_pde_t *const page_directory = kmap_px(pd); 1349 struct i915_page_table *pt; 1350 uint64_t pd_len = length; 1351 uint64_t pd_start = start; 1352 uint32_t pde; 1353 1354 /* Every pd should be allocated, we just did that above. */ 1355 WARN_ON(!pd); 1356 1357 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1358 /* Same reasoning as pd */ 1359 WARN_ON(!pt); 1360 WARN_ON(!pd_len); 1361 WARN_ON(!gen8_pte_count(pd_start, pd_len)); 1362 1363 /* Set our used ptes within the page table */ 1364 bitmap_set(pt->used_ptes, 1365 gen8_pte_index(pd_start), 1366 gen8_pte_count(pd_start, pd_len)); 1367 1368 /* Our pde is now pointing to the pagetable, pt */ 1369 __set_bit(pde, pd->used_pdes); 1370 1371 /* Map the PDE to the page table */ 1372 page_directory[pde] = gen8_pde_encode(px_dma(pt), 1373 I915_CACHE_LLC); 1374 trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, 1375 gen8_pte_index(start), 1376 gen8_pte_count(start, length), 1377 GEN8_PTES); 1378 1379 /* NB: We haven't yet mapped ptes to pages. At this 1380 * point we're still relying on insert_entries() */ 1381 } 1382 1383 kunmap_px(ppgtt, page_directory); 1384 __set_bit(pdpe, pdp->used_pdpes); 1385 gen8_setup_page_directory(ppgtt, pdp, pd, pdpe); 1386 } 1387 1388 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1389 mark_tlbs_dirty(ppgtt); 1390 return 0; 1391 1392 err_out: 1393 while (pdpe--) { 1394 unsigned long temp; 1395 1396 for_each_set_bit(temp, new_page_tables + pdpe * 1397 BITS_TO_LONGS(I915_PDES), I915_PDES) 1398 free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]); 1399 } 1400 1401 for_each_set_bit(pdpe, new_page_dirs, pdpes) 1402 free_pd(dev, pdp->page_directory[pdpe]); 1403 1404 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1405 mark_tlbs_dirty(ppgtt); 1406 return ret; 1407 } 1408 1409 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, 1410 struct i915_pml4 *pml4, 1411 uint64_t start, 1412 uint64_t length) 1413 { 1414 DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); 1415 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1416 struct i915_page_directory_pointer *pdp; 1417 uint64_t pml4e; 1418 int ret = 0; 1419 1420 /* Do the pml4 allocations first, so we don't need to track the newly 1421 * allocated tables below the pdp */ 1422 bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); 1423 1424 /* The pagedirectory and pagetable allocations are done in the shared 3 1425 * and 4 level code. Just allocate the pdps. 1426 */ 1427 ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, 1428 new_pdps); 1429 if (ret) 1430 return ret; 1431 1432 WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, 1433 "The allocation has spanned more than 512GB. " 1434 "It is highly likely this is incorrect."); 1435 1436 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1437 WARN_ON(!pdp); 1438 1439 ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); 1440 if (ret) 1441 goto err_out; 1442 1443 gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e); 1444 } 1445 1446 bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, 1447 GEN8_PML4ES_PER_PML4); 1448 1449 return 0; 1450 1451 err_out: 1452 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1453 gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]); 1454 1455 return ret; 1456 } 1457 1458 static int gen8_alloc_va_range(struct i915_address_space *vm, 1459 uint64_t start, uint64_t length) 1460 { 1461 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1462 1463 if (USES_FULL_48BIT_PPGTT(vm->dev)) 1464 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); 1465 else 1466 return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); 1467 } 1468 1469 static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, 1470 uint64_t start, uint64_t length, 1471 gen8_pte_t scratch_pte, 1472 struct seq_file *m) 1473 { 1474 struct i915_page_directory *pd; 1475 uint32_t pdpe; 1476 1477 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1478 struct i915_page_table *pt; 1479 uint64_t pd_len = length; 1480 uint64_t pd_start = start; 1481 uint32_t pde; 1482 1483 if (!test_bit(pdpe, pdp->used_pdpes)) 1484 continue; 1485 1486 seq_printf(m, "\tPDPE #%d\n", pdpe); 1487 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1488 uint32_t pte; 1489 gen8_pte_t *pt_vaddr; 1490 1491 if (!test_bit(pde, pd->used_pdes)) 1492 continue; 1493 1494 pt_vaddr = kmap_px(pt); 1495 for (pte = 0; pte < GEN8_PTES; pte += 4) { 1496 uint64_t va = 1497 (pdpe << GEN8_PDPE_SHIFT) | 1498 (pde << GEN8_PDE_SHIFT) | 1499 (pte << GEN8_PTE_SHIFT); 1500 int i; 1501 bool found = false; 1502 1503 for (i = 0; i < 4; i++) 1504 if (pt_vaddr[pte + i] != scratch_pte) 1505 found = true; 1506 if (!found) 1507 continue; 1508 1509 seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte); 1510 for (i = 0; i < 4; i++) { 1511 if (pt_vaddr[pte + i] != scratch_pte) 1512 seq_printf(m, " %llx", pt_vaddr[pte + i]); 1513 else 1514 seq_puts(m, " SCRATCH "); 1515 } 1516 seq_puts(m, "\n"); 1517 } 1518 /* don't use kunmap_px, it could trigger 1519 * an unnecessary flush. 1520 */ 1521 kunmap_atomic(pt_vaddr); 1522 } 1523 } 1524 } 1525 1526 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1527 { 1528 struct i915_address_space *vm = &ppgtt->base; 1529 uint64_t start = ppgtt->base.start; 1530 uint64_t length = ppgtt->base.total; 1531 gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 1532 I915_CACHE_LLC); 1533 1534 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 1535 gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); 1536 } else { 1537 uint64_t pml4e; 1538 struct i915_pml4 *pml4 = &ppgtt->pml4; 1539 struct i915_page_directory_pointer *pdp; 1540 1541 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1542 if (!test_bit(pml4e, pml4->used_pml4es)) 1543 continue; 1544 1545 seq_printf(m, " PML4E #%llu\n", pml4e); 1546 gen8_dump_pdp(pdp, start, length, scratch_pte, m); 1547 } 1548 } 1549 } 1550 1551 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) 1552 { 1553 unsigned long *new_page_dirs, *new_page_tables; 1554 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1555 int ret; 1556 1557 /* We allocate temp bitmap for page tables for no gain 1558 * but as this is for init only, lets keep the things simple 1559 */ 1560 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1561 if (ret) 1562 return ret; 1563 1564 /* Allocate for all pdps regardless of how the ppgtt 1565 * was defined. 1566 */ 1567 ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp, 1568 0, 1ULL << 32, 1569 new_page_dirs); 1570 if (!ret) 1571 *ppgtt->pdp.used_pdpes = *new_page_dirs; 1572 1573 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1574 1575 return ret; 1576 } 1577 1578 /* 1579 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 1580 * with a net effect resembling a 2-level page table in normal x86 terms. Each 1581 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 1582 * space. 1583 * 1584 */ 1585 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1586 { 1587 int ret; 1588 1589 ret = gen8_init_scratch(&ppgtt->base); 1590 if (ret) 1591 return ret; 1592 1593 ppgtt->base.start = 0; 1594 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 1595 ppgtt->base.allocate_va_range = gen8_alloc_va_range; 1596 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 1597 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 1598 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 1599 ppgtt->base.bind_vma = ppgtt_bind_vma; 1600 ppgtt->debug_dump = gen8_dump_ppgtt; 1601 1602 if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { 1603 ret = setup_px(ppgtt->base.dev, &ppgtt->pml4); 1604 if (ret) 1605 goto free_scratch; 1606 1607 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); 1608 1609 ppgtt->base.total = 1ULL << 48; 1610 ppgtt->switch_mm = gen8_48b_mm_switch; 1611 } else { 1612 ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp); 1613 if (ret) 1614 goto free_scratch; 1615 1616 ppgtt->base.total = 1ULL << 32; 1617 ppgtt->switch_mm = gen8_legacy_mm_switch; 1618 trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 1619 0, 0, 1620 GEN8_PML4E_SHIFT); 1621 1622 if (intel_vgpu_active(to_i915(ppgtt->base.dev))) { 1623 ret = gen8_preallocate_top_level_pdps(ppgtt); 1624 if (ret) 1625 goto free_scratch; 1626 } 1627 } 1628 1629 if (intel_vgpu_active(to_i915(ppgtt->base.dev))) 1630 gen8_ppgtt_notify_vgt(ppgtt, true); 1631 1632 return 0; 1633 1634 free_scratch: 1635 gen8_free_scratch(&ppgtt->base); 1636 return ret; 1637 } 1638 1639 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1640 { 1641 struct i915_address_space *vm = &ppgtt->base; 1642 struct i915_page_table *unused; 1643 gen6_pte_t scratch_pte; 1644 uint32_t pd_entry; 1645 uint32_t pte, pde; 1646 uint32_t start = ppgtt->base.start, length = ppgtt->base.total; 1647 1648 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 1649 I915_CACHE_LLC, 0); 1650 1651 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) { 1652 u32 expected; 1653 gen6_pte_t *pt_vaddr; 1654 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]); 1655 pd_entry = readl(ppgtt->pd_addr + pde); 1656 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 1657 1658 if (pd_entry != expected) 1659 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 1660 pde, 1661 pd_entry, 1662 expected); 1663 seq_printf(m, "\tPDE: %x\n", pd_entry); 1664 1665 pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]); 1666 1667 for (pte = 0; pte < GEN6_PTES; pte+=4) { 1668 unsigned long va = 1669 (pde * PAGE_SIZE * GEN6_PTES) + 1670 (pte * PAGE_SIZE); 1671 int i; 1672 bool found = false; 1673 for (i = 0; i < 4; i++) 1674 if (pt_vaddr[pte + i] != scratch_pte) 1675 found = true; 1676 if (!found) 1677 continue; 1678 1679 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 1680 for (i = 0; i < 4; i++) { 1681 if (pt_vaddr[pte + i] != scratch_pte) 1682 seq_printf(m, " %08x", pt_vaddr[pte + i]); 1683 else 1684 seq_puts(m, " SCRATCH "); 1685 } 1686 seq_puts(m, "\n"); 1687 } 1688 kunmap_px(ppgtt, pt_vaddr); 1689 } 1690 } 1691 1692 /* Write pde (index) from the page directory @pd to the page table @pt */ 1693 static void gen6_write_pde(struct i915_page_directory *pd, 1694 const int pde, struct i915_page_table *pt) 1695 { 1696 /* Caller needs to make sure the write completes if necessary */ 1697 struct i915_hw_ppgtt *ppgtt = 1698 container_of(pd, struct i915_hw_ppgtt, pd); 1699 u32 pd_entry; 1700 1701 pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt)); 1702 pd_entry |= GEN6_PDE_VALID; 1703 1704 writel(pd_entry, ppgtt->pd_addr + pde); 1705 } 1706 1707 /* Write all the page tables found in the ppgtt structure to incrementing page 1708 * directories. */ 1709 static void gen6_write_page_range(struct drm_i915_private *dev_priv, 1710 struct i915_page_directory *pd, 1711 uint32_t start, uint32_t length) 1712 { 1713 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1714 struct i915_page_table *pt; 1715 uint32_t pde; 1716 1717 gen6_for_each_pde(pt, pd, start, length, pde) 1718 gen6_write_pde(pd, pde, pt); 1719 1720 /* Make sure write is complete before other code can use this page 1721 * table. Also require for WC mapped PTEs */ 1722 readl(ggtt->gsm); 1723 } 1724 1725 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) 1726 { 1727 BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); 1728 1729 return (ppgtt->pd.base.ggtt_offset / 64) << 16; 1730 } 1731 1732 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 1733 struct drm_i915_gem_request *req) 1734 { 1735 struct intel_ring *ring = req->ring; 1736 struct intel_engine_cs *engine = req->engine; 1737 int ret; 1738 1739 /* NB: TLBs must be flushed and invalidated before a switch */ 1740 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1741 if (ret) 1742 return ret; 1743 1744 ret = intel_ring_begin(req, 6); 1745 if (ret) 1746 return ret; 1747 1748 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 1749 intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); 1750 intel_ring_emit(ring, PP_DIR_DCLV_2G); 1751 intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); 1752 intel_ring_emit(ring, get_pd_offset(ppgtt)); 1753 intel_ring_emit(ring, MI_NOOP); 1754 intel_ring_advance(ring); 1755 1756 return 0; 1757 } 1758 1759 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 1760 struct drm_i915_gem_request *req) 1761 { 1762 struct intel_ring *ring = req->ring; 1763 struct intel_engine_cs *engine = req->engine; 1764 int ret; 1765 1766 /* NB: TLBs must be flushed and invalidated before a switch */ 1767 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1768 if (ret) 1769 return ret; 1770 1771 ret = intel_ring_begin(req, 6); 1772 if (ret) 1773 return ret; 1774 1775 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 1776 intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); 1777 intel_ring_emit(ring, PP_DIR_DCLV_2G); 1778 intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); 1779 intel_ring_emit(ring, get_pd_offset(ppgtt)); 1780 intel_ring_emit(ring, MI_NOOP); 1781 intel_ring_advance(ring); 1782 1783 /* XXX: RCS is the only one to auto invalidate the TLBs? */ 1784 if (engine->id != RCS) { 1785 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1786 if (ret) 1787 return ret; 1788 } 1789 1790 return 0; 1791 } 1792 1793 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 1794 struct drm_i915_gem_request *req) 1795 { 1796 struct intel_engine_cs *engine = req->engine; 1797 struct drm_i915_private *dev_priv = req->i915; 1798 1799 I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); 1800 I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); 1801 return 0; 1802 } 1803 1804 static void gen8_ppgtt_enable(struct drm_device *dev) 1805 { 1806 struct drm_i915_private *dev_priv = to_i915(dev); 1807 struct intel_engine_cs *engine; 1808 enum intel_engine_id id; 1809 1810 for_each_engine(engine, dev_priv, id) { 1811 u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0; 1812 I915_WRITE(RING_MODE_GEN7(engine), 1813 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level)); 1814 } 1815 } 1816 1817 static void gen7_ppgtt_enable(struct drm_device *dev) 1818 { 1819 struct drm_i915_private *dev_priv = to_i915(dev); 1820 struct intel_engine_cs *engine; 1821 uint32_t ecochk, ecobits; 1822 enum intel_engine_id id; 1823 1824 ecobits = I915_READ(GAC_ECO_BITS); 1825 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 1826 1827 ecochk = I915_READ(GAM_ECOCHK); 1828 if (IS_HASWELL(dev_priv)) { 1829 ecochk |= ECOCHK_PPGTT_WB_HSW; 1830 } else { 1831 ecochk |= ECOCHK_PPGTT_LLC_IVB; 1832 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 1833 } 1834 I915_WRITE(GAM_ECOCHK, ecochk); 1835 1836 for_each_engine(engine, dev_priv, id) { 1837 /* GFX_MODE is per-ring on gen7+ */ 1838 I915_WRITE(RING_MODE_GEN7(engine), 1839 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1840 } 1841 } 1842 1843 static void gen6_ppgtt_enable(struct drm_device *dev) 1844 { 1845 struct drm_i915_private *dev_priv = to_i915(dev); 1846 uint32_t ecochk, gab_ctl, ecobits; 1847 1848 ecobits = I915_READ(GAC_ECO_BITS); 1849 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 1850 ECOBITS_PPGTT_CACHE64B); 1851 1852 gab_ctl = I915_READ(GAB_CTL); 1853 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 1854 1855 ecochk = I915_READ(GAM_ECOCHK); 1856 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 1857 1858 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1859 } 1860 1861 /* PPGTT support for Sandybdrige/Gen6 and later */ 1862 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 1863 uint64_t start, 1864 uint64_t length) 1865 { 1866 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1867 gen6_pte_t *pt_vaddr, scratch_pte; 1868 unsigned first_entry = start >> PAGE_SHIFT; 1869 unsigned num_entries = length >> PAGE_SHIFT; 1870 unsigned act_pt = first_entry / GEN6_PTES; 1871 unsigned first_pte = first_entry % GEN6_PTES; 1872 unsigned last_pte, i; 1873 1874 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 1875 I915_CACHE_LLC, 0); 1876 1877 while (num_entries) { 1878 last_pte = first_pte + num_entries; 1879 if (last_pte > GEN6_PTES) 1880 last_pte = GEN6_PTES; 1881 1882 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1883 1884 for (i = first_pte; i < last_pte; i++) 1885 pt_vaddr[i] = scratch_pte; 1886 1887 kunmap_px(ppgtt, pt_vaddr); 1888 1889 num_entries -= last_pte - first_pte; 1890 first_pte = 0; 1891 act_pt++; 1892 } 1893 } 1894 1895 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 1896 struct sg_table *pages, 1897 uint64_t start, 1898 enum i915_cache_level cache_level, u32 flags) 1899 { 1900 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1901 unsigned first_entry = start >> PAGE_SHIFT; 1902 unsigned act_pt = first_entry / GEN6_PTES; 1903 unsigned act_pte = first_entry % GEN6_PTES; 1904 gen6_pte_t *pt_vaddr = NULL; 1905 struct sgt_iter sgt_iter; 1906 dma_addr_t addr; 1907 1908 for_each_sgt_dma(addr, sgt_iter, pages) { 1909 if (pt_vaddr == NULL) 1910 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1911 1912 pt_vaddr[act_pte] = 1913 vm->pte_encode(addr, cache_level, flags); 1914 1915 if (++act_pte == GEN6_PTES) { 1916 kunmap_px(ppgtt, pt_vaddr); 1917 pt_vaddr = NULL; 1918 act_pt++; 1919 act_pte = 0; 1920 } 1921 } 1922 1923 if (pt_vaddr) 1924 kunmap_px(ppgtt, pt_vaddr); 1925 } 1926 1927 static int gen6_alloc_va_range(struct i915_address_space *vm, 1928 uint64_t start_in, uint64_t length_in) 1929 { 1930 DECLARE_BITMAP(new_page_tables, I915_PDES); 1931 struct drm_device *dev = vm->dev; 1932 struct drm_i915_private *dev_priv = to_i915(dev); 1933 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1934 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1935 struct i915_page_table *pt; 1936 uint32_t start, length, start_save, length_save; 1937 uint32_t pde; 1938 int ret; 1939 1940 if (WARN_ON(start_in + length_in > ppgtt->base.total)) 1941 return -ENODEV; 1942 1943 start = start_save = start_in; 1944 length = length_save = length_in; 1945 1946 bitmap_zero(new_page_tables, I915_PDES); 1947 1948 /* The allocation is done in two stages so that we can bail out with 1949 * minimal amount of pain. The first stage finds new page tables that 1950 * need allocation. The second stage marks use ptes within the page 1951 * tables. 1952 */ 1953 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { 1954 if (pt != vm->scratch_pt) { 1955 WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES)); 1956 continue; 1957 } 1958 1959 /* We've already allocated a page table */ 1960 WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); 1961 1962 pt = alloc_pt(dev); 1963 if (IS_ERR(pt)) { 1964 ret = PTR_ERR(pt); 1965 goto unwind_out; 1966 } 1967 1968 gen6_initialize_pt(vm, pt); 1969 1970 ppgtt->pd.page_table[pde] = pt; 1971 __set_bit(pde, new_page_tables); 1972 trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT); 1973 } 1974 1975 start = start_save; 1976 length = length_save; 1977 1978 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { 1979 DECLARE_BITMAP(tmp_bitmap, GEN6_PTES); 1980 1981 bitmap_zero(tmp_bitmap, GEN6_PTES); 1982 bitmap_set(tmp_bitmap, gen6_pte_index(start), 1983 gen6_pte_count(start, length)); 1984 1985 if (__test_and_clear_bit(pde, new_page_tables)) 1986 gen6_write_pde(&ppgtt->pd, pde, pt); 1987 1988 trace_i915_page_table_entry_map(vm, pde, pt, 1989 gen6_pte_index(start), 1990 gen6_pte_count(start, length), 1991 GEN6_PTES); 1992 bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes, 1993 GEN6_PTES); 1994 } 1995 1996 WARN_ON(!bitmap_empty(new_page_tables, I915_PDES)); 1997 1998 /* Make sure write is complete before other code can use this page 1999 * table. Also require for WC mapped PTEs */ 2000 readl(ggtt->gsm); 2001 2002 mark_tlbs_dirty(ppgtt); 2003 return 0; 2004 2005 unwind_out: 2006 for_each_set_bit(pde, new_page_tables, I915_PDES) { 2007 struct i915_page_table *pt = ppgtt->pd.page_table[pde]; 2008 2009 ppgtt->pd.page_table[pde] = vm->scratch_pt; 2010 free_pt(vm->dev, pt); 2011 } 2012 2013 mark_tlbs_dirty(ppgtt); 2014 return ret; 2015 } 2016 2017 static int gen6_init_scratch(struct i915_address_space *vm) 2018 { 2019 struct drm_device *dev = vm->dev; 2020 int ret; 2021 2022 ret = setup_scratch_page(dev, &vm->scratch_page, I915_GFP_DMA); 2023 if (ret) 2024 return ret; 2025 2026 vm->scratch_pt = alloc_pt(dev); 2027 if (IS_ERR(vm->scratch_pt)) { 2028 cleanup_scratch_page(dev, &vm->scratch_page); 2029 return PTR_ERR(vm->scratch_pt); 2030 } 2031 2032 gen6_initialize_pt(vm, vm->scratch_pt); 2033 2034 return 0; 2035 } 2036 2037 static void gen6_free_scratch(struct i915_address_space *vm) 2038 { 2039 struct drm_device *dev = vm->dev; 2040 2041 free_pt(dev, vm->scratch_pt); 2042 cleanup_scratch_page(dev, &vm->scratch_page); 2043 } 2044 2045 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 2046 { 2047 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 2048 struct i915_page_directory *pd = &ppgtt->pd; 2049 struct drm_device *dev = vm->dev; 2050 struct i915_page_table *pt; 2051 uint32_t pde; 2052 2053 drm_mm_remove_node(&ppgtt->node); 2054 2055 gen6_for_all_pdes(pt, pd, pde) 2056 if (pt != vm->scratch_pt) 2057 free_pt(dev, pt); 2058 2059 gen6_free_scratch(vm); 2060 } 2061 2062 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 2063 { 2064 struct i915_address_space *vm = &ppgtt->base; 2065 struct drm_device *dev = ppgtt->base.dev; 2066 struct drm_i915_private *dev_priv = to_i915(dev); 2067 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2068 bool retried = false; 2069 int ret; 2070 2071 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 2072 * allocator works in address space sizes, so it's multiplied by page 2073 * size. We allocate at the top of the GTT to avoid fragmentation. 2074 */ 2075 BUG_ON(!drm_mm_initialized(&ggtt->base.mm)); 2076 2077 ret = gen6_init_scratch(vm); 2078 if (ret) 2079 return ret; 2080 2081 alloc: 2082 ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, 2083 &ppgtt->node, GEN6_PD_SIZE, 2084 GEN6_PD_ALIGN, 0, 2085 0, ggtt->base.total, 2086 DRM_MM_TOPDOWN); 2087 if (ret == -ENOSPC && !retried) { 2088 ret = i915_gem_evict_something(&ggtt->base, 2089 GEN6_PD_SIZE, GEN6_PD_ALIGN, 2090 I915_CACHE_NONE, 2091 0, ggtt->base.total, 2092 0); 2093 if (ret) 2094 goto err_out; 2095 2096 retried = true; 2097 goto alloc; 2098 } 2099 2100 if (ret) 2101 goto err_out; 2102 2103 2104 if (ppgtt->node.start < ggtt->mappable_end) 2105 DRM_DEBUG("Forced to use aperture for PDEs\n"); 2106 2107 return 0; 2108 2109 err_out: 2110 gen6_free_scratch(vm); 2111 return ret; 2112 } 2113 2114 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 2115 { 2116 return gen6_ppgtt_allocate_page_directories(ppgtt); 2117 } 2118 2119 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, 2120 uint64_t start, uint64_t length) 2121 { 2122 struct i915_page_table *unused; 2123 uint32_t pde; 2124 2125 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) 2126 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; 2127 } 2128 2129 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 2130 { 2131 struct drm_device *dev = ppgtt->base.dev; 2132 struct drm_i915_private *dev_priv = to_i915(dev); 2133 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2134 int ret; 2135 2136 ppgtt->base.pte_encode = ggtt->base.pte_encode; 2137 if (intel_vgpu_active(dev_priv) || IS_GEN6(dev_priv)) 2138 ppgtt->switch_mm = gen6_mm_switch; 2139 else if (IS_HASWELL(dev_priv)) 2140 ppgtt->switch_mm = hsw_mm_switch; 2141 else if (IS_GEN7(dev_priv)) 2142 ppgtt->switch_mm = gen7_mm_switch; 2143 else 2144 BUG(); 2145 2146 ret = gen6_ppgtt_alloc(ppgtt); 2147 if (ret) 2148 return ret; 2149 2150 ppgtt->base.allocate_va_range = gen6_alloc_va_range; 2151 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 2152 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 2153 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 2154 ppgtt->base.bind_vma = ppgtt_bind_vma; 2155 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 2156 ppgtt->base.start = 0; 2157 ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; 2158 ppgtt->debug_dump = gen6_dump_ppgtt; 2159 2160 ppgtt->pd.base.ggtt_offset = 2161 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); 2162 2163 ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + 2164 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); 2165 2166 gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); 2167 2168 gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); 2169 2170 DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", 2171 ppgtt->node.size >> 20, 2172 ppgtt->node.start / PAGE_SIZE); 2173 2174 DRM_DEBUG("Adding PPGTT at offset %x\n", 2175 ppgtt->pd.base.ggtt_offset << 10); 2176 2177 return 0; 2178 } 2179 2180 static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, 2181 struct drm_i915_private *dev_priv) 2182 { 2183 ppgtt->base.dev = &dev_priv->drm; 2184 2185 if (INTEL_INFO(dev_priv)->gen < 8) 2186 return gen6_ppgtt_init(ppgtt); 2187 else 2188 return gen8_ppgtt_init(ppgtt); 2189 } 2190 2191 static void i915_address_space_init(struct i915_address_space *vm, 2192 struct drm_i915_private *dev_priv, 2193 const char *name) 2194 { 2195 i915_gem_timeline_init(dev_priv, &vm->timeline, name); 2196 drm_mm_init(&vm->mm, vm->start, vm->total); 2197 INIT_LIST_HEAD(&vm->active_list); 2198 INIT_LIST_HEAD(&vm->inactive_list); 2199 INIT_LIST_HEAD(&vm->unbound_list); 2200 list_add_tail(&vm->global_link, &dev_priv->vm_list); 2201 } 2202 2203 static void gtt_write_workarounds(struct drm_device *dev) 2204 { 2205 struct drm_i915_private *dev_priv = to_i915(dev); 2206 2207 /* This function is for gtt related workarounds. This function is 2208 * called on driver load and after a GPU reset, so you can place 2209 * workarounds here even if they get overwritten by GPU reset. 2210 */ 2211 /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */ 2212 if (IS_BROADWELL(dev_priv)) 2213 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 2214 else if (IS_CHERRYVIEW(dev_priv)) 2215 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 2216 else if (IS_SKYLAKE(dev_priv)) 2217 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 2218 else if (IS_BROXTON(dev_priv)) 2219 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 2220 } 2221 2222 static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt, 2223 struct drm_i915_private *dev_priv, 2224 struct drm_i915_file_private *file_priv, 2225 const char *name) 2226 { 2227 int ret; 2228 2229 ret = __hw_ppgtt_init(ppgtt, dev_priv); 2230 if (ret == 0) { 2231 kref_init(&ppgtt->ref); 2232 i915_address_space_init(&ppgtt->base, dev_priv, name); 2233 ppgtt->base.file = file_priv; 2234 } 2235 2236 return ret; 2237 } 2238 2239 int i915_ppgtt_init_hw(struct drm_device *dev) 2240 { 2241 struct drm_i915_private *dev_priv = to_i915(dev); 2242 2243 gtt_write_workarounds(dev); 2244 2245 /* In the case of execlists, PPGTT is enabled by the context descriptor 2246 * and the PDPs are contained within the context itself. We don't 2247 * need to do anything here. */ 2248 if (i915.enable_execlists) 2249 return 0; 2250 2251 if (!USES_PPGTT(dev)) 2252 return 0; 2253 2254 if (IS_GEN6(dev_priv)) 2255 gen6_ppgtt_enable(dev); 2256 else if (IS_GEN7(dev_priv)) 2257 gen7_ppgtt_enable(dev); 2258 else if (INTEL_INFO(dev)->gen >= 8) 2259 gen8_ppgtt_enable(dev); 2260 else 2261 MISSING_CASE(INTEL_INFO(dev)->gen); 2262 2263 return 0; 2264 } 2265 2266 struct i915_hw_ppgtt * 2267 i915_ppgtt_create(struct drm_i915_private *dev_priv, 2268 struct drm_i915_file_private *fpriv, 2269 const char *name) 2270 { 2271 struct i915_hw_ppgtt *ppgtt; 2272 int ret; 2273 2274 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2275 if (!ppgtt) 2276 return ERR_PTR(-ENOMEM); 2277 2278 ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv, name); 2279 if (ret) { 2280 kfree(ppgtt); 2281 return ERR_PTR(ret); 2282 } 2283 2284 trace_i915_ppgtt_create(&ppgtt->base); 2285 2286 return ppgtt; 2287 } 2288 2289 void i915_ppgtt_release(struct kref *kref) 2290 { 2291 struct i915_hw_ppgtt *ppgtt = 2292 container_of(kref, struct i915_hw_ppgtt, ref); 2293 2294 trace_i915_ppgtt_release(&ppgtt->base); 2295 2296 /* vmas should already be unbound and destroyed */ 2297 WARN_ON(!list_empty(&ppgtt->base.active_list)); 2298 WARN_ON(!list_empty(&ppgtt->base.inactive_list)); 2299 WARN_ON(!list_empty(&ppgtt->base.unbound_list)); 2300 2301 i915_gem_timeline_fini(&ppgtt->base.timeline); 2302 list_del(&ppgtt->base.global_link); 2303 drm_mm_takedown(&ppgtt->base.mm); 2304 2305 ppgtt->base.cleanup(&ppgtt->base); 2306 kfree(ppgtt); 2307 } 2308 2309 /* Certain Gen5 chipsets require require idling the GPU before 2310 * unmapping anything from the GTT when VT-d is enabled. 2311 */ 2312 static bool needs_idle_maps(struct drm_i915_private *dev_priv) 2313 { 2314 #ifdef CONFIG_INTEL_IOMMU 2315 /* Query intel_iommu to see if we need the workaround. Presumably that 2316 * was loaded first. 2317 */ 2318 if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped) 2319 return true; 2320 #endif 2321 return false; 2322 } 2323 2324 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) 2325 { 2326 struct intel_engine_cs *engine; 2327 enum intel_engine_id id; 2328 2329 if (INTEL_INFO(dev_priv)->gen < 6) 2330 return; 2331 2332 for_each_engine(engine, dev_priv, id) { 2333 u32 fault_reg; 2334 fault_reg = I915_READ(RING_FAULT_REG(engine)); 2335 if (fault_reg & RING_FAULT_VALID) { 2336 DRM_DEBUG_DRIVER("Unexpected fault\n" 2337 "\tAddr: 0x%08lx\n" 2338 "\tAddress space: %s\n" 2339 "\tSource ID: %d\n" 2340 "\tType: %d\n", 2341 fault_reg & PAGE_MASK, 2342 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 2343 RING_FAULT_SRCID(fault_reg), 2344 RING_FAULT_FAULT_TYPE(fault_reg)); 2345 I915_WRITE(RING_FAULT_REG(engine), 2346 fault_reg & ~RING_FAULT_VALID); 2347 } 2348 } 2349 2350 /* Engine specific init may not have been done till this point. */ 2351 if (dev_priv->engine[RCS]) 2352 POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS])); 2353 } 2354 2355 static void i915_ggtt_flush(struct drm_i915_private *dev_priv) 2356 { 2357 if (INTEL_INFO(dev_priv)->gen < 6) { 2358 intel_gtt_chipset_flush(); 2359 } else { 2360 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2361 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2362 } 2363 } 2364 2365 void i915_gem_suspend_gtt_mappings(struct drm_device *dev) 2366 { 2367 struct drm_i915_private *dev_priv = to_i915(dev); 2368 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2369 2370 /* Don't bother messing with faults pre GEN6 as we have little 2371 * documentation supporting that it's a good idea. 2372 */ 2373 if (INTEL_INFO(dev)->gen < 6) 2374 return; 2375 2376 i915_check_and_clear_faults(dev_priv); 2377 2378 ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); 2379 2380 i915_ggtt_flush(dev_priv); 2381 } 2382 2383 int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, 2384 struct sg_table *pages) 2385 { 2386 if (dma_map_sg(&obj->base.dev->pdev->dev, 2387 pages->sgl, pages->nents, 2388 PCI_DMA_BIDIRECTIONAL)) 2389 return 0; 2390 2391 return -ENOSPC; 2392 } 2393 2394 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 2395 { 2396 writeq(pte, addr); 2397 } 2398 2399 static void gen8_ggtt_insert_page(struct i915_address_space *vm, 2400 dma_addr_t addr, 2401 uint64_t offset, 2402 enum i915_cache_level level, 2403 u32 unused) 2404 { 2405 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2406 gen8_pte_t __iomem *pte = 2407 (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + 2408 (offset >> PAGE_SHIFT); 2409 2410 gen8_set_pte(pte, gen8_pte_encode(addr, level)); 2411 2412 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2413 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2414 } 2415 2416 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 2417 struct sg_table *st, 2418 uint64_t start, 2419 enum i915_cache_level level, u32 unused) 2420 { 2421 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2422 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2423 struct sgt_iter sgt_iter; 2424 gen8_pte_t __iomem *gtt_entries; 2425 gen8_pte_t gtt_entry; 2426 dma_addr_t addr; 2427 int i = 0; 2428 2429 gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); 2430 2431 for_each_sgt_dma(addr, sgt_iter, st) { 2432 gtt_entry = gen8_pte_encode(addr, level); 2433 gen8_set_pte(>t_entries[i++], gtt_entry); 2434 } 2435 2436 /* 2437 * XXX: This serves as a posting read to make sure that the PTE has 2438 * actually been updated. There is some concern that even though 2439 * registers and PTEs are within the same BAR that they are potentially 2440 * of NUMA access patterns. Therefore, even with the way we assume 2441 * hardware should work, we must keep this posting read for paranoia. 2442 */ 2443 if (i != 0) 2444 WARN_ON(readq(>t_entries[i-1]) != gtt_entry); 2445 2446 /* This next bit makes the above posting read even more important. We 2447 * want to flush the TLBs only after we're certain all the PTE updates 2448 * have finished. 2449 */ 2450 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2451 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2452 } 2453 2454 struct insert_entries { 2455 struct i915_address_space *vm; 2456 struct sg_table *st; 2457 uint64_t start; 2458 enum i915_cache_level level; 2459 u32 flags; 2460 }; 2461 2462 static int gen8_ggtt_insert_entries__cb(void *_arg) 2463 { 2464 struct insert_entries *arg = _arg; 2465 gen8_ggtt_insert_entries(arg->vm, arg->st, 2466 arg->start, arg->level, arg->flags); 2467 return 0; 2468 } 2469 2470 static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, 2471 struct sg_table *st, 2472 uint64_t start, 2473 enum i915_cache_level level, 2474 u32 flags) 2475 { 2476 struct insert_entries arg = { vm, st, start, level, flags }; 2477 stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); 2478 } 2479 2480 static void gen6_ggtt_insert_page(struct i915_address_space *vm, 2481 dma_addr_t addr, 2482 uint64_t offset, 2483 enum i915_cache_level level, 2484 u32 flags) 2485 { 2486 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2487 gen6_pte_t __iomem *pte = 2488 (gen6_pte_t __iomem *)dev_priv->ggtt.gsm + 2489 (offset >> PAGE_SHIFT); 2490 2491 iowrite32(vm->pte_encode(addr, level, flags), pte); 2492 2493 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2494 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2495 } 2496 2497 /* 2498 * Binds an object into the global gtt with the specified cache level. The object 2499 * will be accessible to the GPU via commands whose operands reference offsets 2500 * within the global GTT as well as accessible by the GPU through the GMADR 2501 * mapped BAR (dev_priv->mm.gtt->gtt). 2502 */ 2503 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 2504 struct sg_table *st, 2505 uint64_t start, 2506 enum i915_cache_level level, u32 flags) 2507 { 2508 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2509 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2510 struct sgt_iter sgt_iter; 2511 gen6_pte_t __iomem *gtt_entries; 2512 gen6_pte_t gtt_entry; 2513 dma_addr_t addr; 2514 int i = 0; 2515 2516 gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); 2517 2518 for_each_sgt_dma(addr, sgt_iter, st) { 2519 gtt_entry = vm->pte_encode(addr, level, flags); 2520 iowrite32(gtt_entry, >t_entries[i++]); 2521 } 2522 2523 /* XXX: This serves as a posting read to make sure that the PTE has 2524 * actually been updated. There is some concern that even though 2525 * registers and PTEs are within the same BAR that they are potentially 2526 * of NUMA access patterns. Therefore, even with the way we assume 2527 * hardware should work, we must keep this posting read for paranoia. 2528 */ 2529 if (i != 0) 2530 WARN_ON(readl(>t_entries[i-1]) != gtt_entry); 2531 2532 /* This next bit makes the above posting read even more important. We 2533 * want to flush the TLBs only after we're certain all the PTE updates 2534 * have finished. 2535 */ 2536 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2537 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2538 } 2539 2540 static void nop_clear_range(struct i915_address_space *vm, 2541 uint64_t start, uint64_t length) 2542 { 2543 } 2544 2545 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 2546 uint64_t start, uint64_t length) 2547 { 2548 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2549 unsigned first_entry = start >> PAGE_SHIFT; 2550 unsigned num_entries = length >> PAGE_SHIFT; 2551 gen8_pte_t scratch_pte, __iomem *gtt_base = 2552 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 2553 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2554 int i; 2555 2556 if (WARN(num_entries > max_entries, 2557 "First entry = %d; Num entries = %d (max=%d)\n", 2558 first_entry, num_entries, max_entries)) 2559 num_entries = max_entries; 2560 2561 scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 2562 I915_CACHE_LLC); 2563 for (i = 0; i < num_entries; i++) 2564 gen8_set_pte(>t_base[i], scratch_pte); 2565 readl(gtt_base); 2566 } 2567 2568 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 2569 uint64_t start, 2570 uint64_t length) 2571 { 2572 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2573 unsigned first_entry = start >> PAGE_SHIFT; 2574 unsigned num_entries = length >> PAGE_SHIFT; 2575 gen6_pte_t scratch_pte, __iomem *gtt_base = 2576 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 2577 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2578 int i; 2579 2580 if (WARN(num_entries > max_entries, 2581 "First entry = %d; Num entries = %d (max=%d)\n", 2582 first_entry, num_entries, max_entries)) 2583 num_entries = max_entries; 2584 2585 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 2586 I915_CACHE_LLC, 0); 2587 2588 for (i = 0; i < num_entries; i++) 2589 iowrite32(scratch_pte, >t_base[i]); 2590 readl(gtt_base); 2591 } 2592 2593 static void i915_ggtt_insert_page(struct i915_address_space *vm, 2594 dma_addr_t addr, 2595 uint64_t offset, 2596 enum i915_cache_level cache_level, 2597 u32 unused) 2598 { 2599 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2600 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2601 2602 intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); 2603 } 2604 2605 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 2606 struct sg_table *pages, 2607 uint64_t start, 2608 enum i915_cache_level cache_level, u32 unused) 2609 { 2610 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2611 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2612 2613 intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags); 2614 2615 } 2616 2617 static void i915_ggtt_clear_range(struct i915_address_space *vm, 2618 uint64_t start, 2619 uint64_t length) 2620 { 2621 intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); 2622 } 2623 2624 static int ggtt_bind_vma(struct i915_vma *vma, 2625 enum i915_cache_level cache_level, 2626 u32 flags) 2627 { 2628 struct drm_i915_private *i915 = to_i915(vma->vm->dev); 2629 struct drm_i915_gem_object *obj = vma->obj; 2630 u32 pte_flags = 0; 2631 int ret; 2632 2633 ret = i915_get_ggtt_vma_pages(vma); 2634 if (ret) 2635 return ret; 2636 2637 /* Currently applicable only to VLV */ 2638 if (obj->gt_ro) 2639 pte_flags |= PTE_READ_ONLY; 2640 2641 intel_runtime_pm_get(i915); 2642 vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, 2643 cache_level, pte_flags); 2644 intel_runtime_pm_put(i915); 2645 2646 /* 2647 * Without aliasing PPGTT there's no difference between 2648 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally 2649 * upgrade to both bound if we bind either to avoid double-binding. 2650 */ 2651 vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; 2652 2653 return 0; 2654 } 2655 2656 static int aliasing_gtt_bind_vma(struct i915_vma *vma, 2657 enum i915_cache_level cache_level, 2658 u32 flags) 2659 { 2660 struct drm_i915_private *i915 = to_i915(vma->vm->dev); 2661 u32 pte_flags; 2662 int ret; 2663 2664 ret = i915_get_ggtt_vma_pages(vma); 2665 if (ret) 2666 return ret; 2667 2668 /* Currently applicable only to VLV */ 2669 pte_flags = 0; 2670 if (vma->obj->gt_ro) 2671 pte_flags |= PTE_READ_ONLY; 2672 2673 2674 if (flags & I915_VMA_GLOBAL_BIND) { 2675 intel_runtime_pm_get(i915); 2676 vma->vm->insert_entries(vma->vm, 2677 vma->pages, vma->node.start, 2678 cache_level, pte_flags); 2679 intel_runtime_pm_put(i915); 2680 } 2681 2682 if (flags & I915_VMA_LOCAL_BIND) { 2683 struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; 2684 appgtt->base.insert_entries(&appgtt->base, 2685 vma->pages, vma->node.start, 2686 cache_level, pte_flags); 2687 } 2688 2689 return 0; 2690 } 2691 2692 static void ggtt_unbind_vma(struct i915_vma *vma) 2693 { 2694 struct drm_i915_private *i915 = to_i915(vma->vm->dev); 2695 struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; 2696 const u64 size = min(vma->size, vma->node.size); 2697 2698 if (vma->flags & I915_VMA_GLOBAL_BIND) { 2699 intel_runtime_pm_get(i915); 2700 vma->vm->clear_range(vma->vm, 2701 vma->node.start, size); 2702 intel_runtime_pm_put(i915); 2703 } 2704 2705 if (vma->flags & I915_VMA_LOCAL_BIND && appgtt) 2706 appgtt->base.clear_range(&appgtt->base, 2707 vma->node.start, size); 2708 } 2709 2710 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, 2711 struct sg_table *pages) 2712 { 2713 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2714 struct device *kdev = &dev_priv->drm.pdev->dev; 2715 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2716 2717 if (unlikely(ggtt->do_idle_maps)) { 2718 if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) { 2719 DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); 2720 /* Wait a bit, in hopes it avoids the hang */ 2721 udelay(10); 2722 } 2723 } 2724 2725 dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL); 2726 } 2727 2728 static void i915_gtt_color_adjust(struct drm_mm_node *node, 2729 unsigned long color, 2730 u64 *start, 2731 u64 *end) 2732 { 2733 if (node->color != color) 2734 *start += 4096; 2735 2736 node = list_first_entry_or_null(&node->node_list, 2737 struct drm_mm_node, 2738 node_list); 2739 if (node && node->allocated && node->color != color) 2740 *end -= 4096; 2741 } 2742 2743 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) 2744 { 2745 /* Let GEM Manage all of the aperture. 2746 * 2747 * However, leave one page at the end still bound to the scratch page. 2748 * There are a number of places where the hardware apparently prefetches 2749 * past the end of the object, and we've seen multiple hangs with the 2750 * GPU head pointer stuck in a batchbuffer bound at the last page of the 2751 * aperture. One page should be enough to keep any prefetching inside 2752 * of the aperture. 2753 */ 2754 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2755 unsigned long hole_start, hole_end; 2756 struct i915_hw_ppgtt *ppgtt; 2757 struct drm_mm_node *entry; 2758 int ret; 2759 2760 ret = intel_vgt_balloon(dev_priv); 2761 if (ret) 2762 return ret; 2763 2764 /* Reserve a mappable slot for our lockless error capture */ 2765 ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, 2766 &ggtt->error_capture, 2767 4096, 0, -1, 2768 0, ggtt->mappable_end, 2769 0, 0); 2770 if (ret) 2771 return ret; 2772 2773 /* Clear any non-preallocated blocks */ 2774 drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { 2775 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 2776 hole_start, hole_end); 2777 ggtt->base.clear_range(&ggtt->base, hole_start, 2778 hole_end - hole_start); 2779 } 2780 2781 /* And finally clear the reserved guard page */ 2782 ggtt->base.clear_range(&ggtt->base, 2783 ggtt->base.total - PAGE_SIZE, PAGE_SIZE); 2784 2785 if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { 2786 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2787 if (!ppgtt) { 2788 ret = -ENOMEM; 2789 goto err; 2790 } 2791 2792 ret = __hw_ppgtt_init(ppgtt, dev_priv); 2793 if (ret) 2794 goto err_ppgtt; 2795 2796 if (ppgtt->base.allocate_va_range) { 2797 ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0, 2798 ppgtt->base.total); 2799 if (ret) 2800 goto err_ppgtt_cleanup; 2801 } 2802 2803 ppgtt->base.clear_range(&ppgtt->base, 2804 ppgtt->base.start, 2805 ppgtt->base.total); 2806 2807 dev_priv->mm.aliasing_ppgtt = ppgtt; 2808 WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); 2809 ggtt->base.bind_vma = aliasing_gtt_bind_vma; 2810 } 2811 2812 return 0; 2813 2814 err_ppgtt_cleanup: 2815 ppgtt->base.cleanup(&ppgtt->base); 2816 err_ppgtt: 2817 kfree(ppgtt); 2818 err: 2819 drm_mm_remove_node(&ggtt->error_capture); 2820 return ret; 2821 } 2822 2823 /** 2824 * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization 2825 * @dev_priv: i915 device 2826 */ 2827 void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) 2828 { 2829 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2830 2831 if (dev_priv->mm.aliasing_ppgtt) { 2832 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 2833 ppgtt->base.cleanup(&ppgtt->base); 2834 kfree(ppgtt); 2835 } 2836 2837 i915_gem_cleanup_stolen(&dev_priv->drm); 2838 2839 if (drm_mm_node_allocated(&ggtt->error_capture)) 2840 drm_mm_remove_node(&ggtt->error_capture); 2841 2842 if (drm_mm_initialized(&ggtt->base.mm)) { 2843 intel_vgt_deballoon(dev_priv); 2844 2845 drm_mm_takedown(&ggtt->base.mm); 2846 list_del(&ggtt->base.global_link); 2847 } 2848 2849 ggtt->base.cleanup(&ggtt->base); 2850 2851 arch_phys_wc_del(ggtt->mtrr); 2852 io_mapping_fini(&ggtt->mappable); 2853 } 2854 2855 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 2856 { 2857 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 2858 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 2859 return snb_gmch_ctl << 20; 2860 } 2861 2862 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 2863 { 2864 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 2865 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 2866 if (bdw_gmch_ctl) 2867 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 2868 2869 #ifdef CONFIG_X86_32 2870 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ 2871 if (bdw_gmch_ctl > 4) 2872 bdw_gmch_ctl = 4; 2873 #endif 2874 2875 return bdw_gmch_ctl << 20; 2876 } 2877 2878 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 2879 { 2880 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 2881 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 2882 2883 if (gmch_ctrl) 2884 return 1 << (20 + gmch_ctrl); 2885 2886 return 0; 2887 } 2888 2889 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 2890 { 2891 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 2892 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 2893 return snb_gmch_ctl << 25; /* 32 MB units */ 2894 } 2895 2896 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 2897 { 2898 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2899 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 2900 return bdw_gmch_ctl << 25; /* 32 MB units */ 2901 } 2902 2903 static size_t chv_get_stolen_size(u16 gmch_ctrl) 2904 { 2905 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; 2906 gmch_ctrl &= SNB_GMCH_GMS_MASK; 2907 2908 /* 2909 * 0x0 to 0x10: 32MB increments starting at 0MB 2910 * 0x11 to 0x16: 4MB increments starting at 8MB 2911 * 0x17 to 0x1d: 4MB increments start at 36MB 2912 */ 2913 if (gmch_ctrl < 0x11) 2914 return gmch_ctrl << 25; 2915 else if (gmch_ctrl < 0x17) 2916 return (gmch_ctrl - 0x11 + 2) << 22; 2917 else 2918 return (gmch_ctrl - 0x17 + 9) << 22; 2919 } 2920 2921 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) 2922 { 2923 gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2924 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; 2925 2926 if (gen9_gmch_ctl < 0xf0) 2927 return gen9_gmch_ctl << 25; /* 32 MB units */ 2928 else 2929 /* 4MB increments starting at 0xf0 for 4MB */ 2930 return (gen9_gmch_ctl - 0xf0 + 1) << 22; 2931 } 2932 2933 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 2934 { 2935 struct pci_dev *pdev = ggtt->base.dev->pdev; 2936 phys_addr_t phys_addr; 2937 int ret; 2938 2939 /* For Modern GENs the PTEs and register space are split in the BAR */ 2940 phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; 2941 2942 /* 2943 * On BXT writes larger than 64 bit to the GTT pagetable range will be 2944 * dropped. For WC mappings in general we have 64 byte burst writes 2945 * when the WC buffer is flushed, so we can't use it, but have to 2946 * resort to an uncached mapping. The WC issue is easily caught by the 2947 * readback check when writing GTT PTE entries. 2948 */ 2949 if (IS_BROXTON(to_i915(ggtt->base.dev))) 2950 ggtt->gsm = ioremap_nocache(phys_addr, size); 2951 else 2952 ggtt->gsm = ioremap_wc(phys_addr, size); 2953 if (!ggtt->gsm) { 2954 DRM_ERROR("Failed to map the ggtt page table\n"); 2955 return -ENOMEM; 2956 } 2957 2958 ret = setup_scratch_page(ggtt->base.dev, 2959 &ggtt->base.scratch_page, 2960 GFP_DMA32); 2961 if (ret) { 2962 DRM_ERROR("Scratch setup failed\n"); 2963 /* iounmap will also get called at remove, but meh */ 2964 iounmap(ggtt->gsm); 2965 return ret; 2966 } 2967 2968 return 0; 2969 } 2970 2971 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 2972 * bits. When using advanced contexts each context stores its own PAT, but 2973 * writing this data shouldn't be harmful even in those cases. */ 2974 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) 2975 { 2976 uint64_t pat; 2977 2978 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 2979 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 2980 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 2981 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 2982 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 2983 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 2984 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 2985 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 2986 2987 if (!USES_PPGTT(dev_priv)) 2988 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry, 2989 * so RTL will always use the value corresponding to 2990 * pat_sel = 000". 2991 * So let's disable cache for GGTT to avoid screen corruptions. 2992 * MOCS still can be used though. 2993 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work 2994 * before this patch, i.e. the same uncached + snooping access 2995 * like on gen6/7 seems to be in effect. 2996 * - So this just fixes blitter/render access. Again it looks 2997 * like it's not just uncached access, but uncached + snooping. 2998 * So we can still hold onto all our assumptions wrt cpu 2999 * clflushing on LLC machines. 3000 */ 3001 pat = GEN8_PPAT(0, GEN8_PPAT_UC); 3002 3003 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 3004 * write would work. */ 3005 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 3006 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 3007 } 3008 3009 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) 3010 { 3011 uint64_t pat; 3012 3013 /* 3014 * Map WB on BDW to snooped on CHV. 3015 * 3016 * Only the snoop bit has meaning for CHV, the rest is 3017 * ignored. 3018 * 3019 * The hardware will never snoop for certain types of accesses: 3020 * - CPU GTT (GMADR->GGTT->no snoop->memory) 3021 * - PPGTT page tables 3022 * - some other special cycles 3023 * 3024 * As with BDW, we also need to consider the following for GT accesses: 3025 * "For GGTT, there is NO pat_sel[2:0] from the entry, 3026 * so RTL will always use the value corresponding to 3027 * pat_sel = 000". 3028 * Which means we must set the snoop bit in PAT entry 0 3029 * in order to keep the global status page working. 3030 */ 3031 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 3032 GEN8_PPAT(1, 0) | 3033 GEN8_PPAT(2, 0) | 3034 GEN8_PPAT(3, 0) | 3035 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 3036 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 3037 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 3038 GEN8_PPAT(7, CHV_PPAT_SNOOP); 3039 3040 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 3041 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 3042 } 3043 3044 static void gen6_gmch_remove(struct i915_address_space *vm) 3045 { 3046 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 3047 3048 iounmap(ggtt->gsm); 3049 cleanup_scratch_page(vm->dev, &vm->scratch_page); 3050 } 3051 3052 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 3053 { 3054 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); 3055 struct pci_dev *pdev = dev_priv->drm.pdev; 3056 unsigned int size; 3057 u16 snb_gmch_ctl; 3058 3059 /* TODO: We're not aware of mappable constraints on gen8 yet */ 3060 ggtt->mappable_base = pci_resource_start(pdev, 2); 3061 ggtt->mappable_end = pci_resource_len(pdev, 2); 3062 3063 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39))) 3064 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); 3065 3066 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3067 3068 if (INTEL_GEN(dev_priv) >= 9) { 3069 ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl); 3070 size = gen8_get_total_gtt_size(snb_gmch_ctl); 3071 } else if (IS_CHERRYVIEW(dev_priv)) { 3072 ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl); 3073 size = chv_get_total_gtt_size(snb_gmch_ctl); 3074 } else { 3075 ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl); 3076 size = gen8_get_total_gtt_size(snb_gmch_ctl); 3077 } 3078 3079 ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT; 3080 3081 if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) 3082 chv_setup_private_ppat(dev_priv); 3083 else 3084 bdw_setup_private_ppat(dev_priv); 3085 3086 ggtt->base.cleanup = gen6_gmch_remove; 3087 ggtt->base.bind_vma = ggtt_bind_vma; 3088 ggtt->base.unbind_vma = ggtt_unbind_vma; 3089 ggtt->base.insert_page = gen8_ggtt_insert_page; 3090 ggtt->base.clear_range = nop_clear_range; 3091 if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) 3092 ggtt->base.clear_range = gen8_ggtt_clear_range; 3093 3094 ggtt->base.insert_entries = gen8_ggtt_insert_entries; 3095 if (IS_CHERRYVIEW(dev_priv)) 3096 ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; 3097 3098 return ggtt_probe_common(ggtt, size); 3099 } 3100 3101 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 3102 { 3103 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); 3104 struct pci_dev *pdev = dev_priv->drm.pdev; 3105 unsigned int size; 3106 u16 snb_gmch_ctl; 3107 3108 ggtt->mappable_base = pci_resource_start(pdev, 2); 3109 ggtt->mappable_end = pci_resource_len(pdev, 2); 3110 3111 /* 64/512MB is the current min/max we actually know of, but this is just 3112 * a coarse sanity check. 3113 */ 3114 if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { 3115 DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end); 3116 return -ENXIO; 3117 } 3118 3119 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40))) 3120 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); 3121 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3122 3123 ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); 3124 3125 size = gen6_get_total_gtt_size(snb_gmch_ctl); 3126 ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT; 3127 3128 ggtt->base.clear_range = gen6_ggtt_clear_range; 3129 ggtt->base.insert_page = gen6_ggtt_insert_page; 3130 ggtt->base.insert_entries = gen6_ggtt_insert_entries; 3131 ggtt->base.bind_vma = ggtt_bind_vma; 3132 ggtt->base.unbind_vma = ggtt_unbind_vma; 3133 ggtt->base.cleanup = gen6_gmch_remove; 3134 3135 if (HAS_EDRAM(dev_priv)) 3136 ggtt->base.pte_encode = iris_pte_encode; 3137 else if (IS_HASWELL(dev_priv)) 3138 ggtt->base.pte_encode = hsw_pte_encode; 3139 else if (IS_VALLEYVIEW(dev_priv)) 3140 ggtt->base.pte_encode = byt_pte_encode; 3141 else if (INTEL_GEN(dev_priv) >= 7) 3142 ggtt->base.pte_encode = ivb_pte_encode; 3143 else 3144 ggtt->base.pte_encode = snb_pte_encode; 3145 3146 return ggtt_probe_common(ggtt, size); 3147 } 3148 3149 static void i915_gmch_remove(struct i915_address_space *vm) 3150 { 3151 intel_gmch_remove(); 3152 } 3153 3154 static int i915_gmch_probe(struct i915_ggtt *ggtt) 3155 { 3156 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); 3157 int ret; 3158 3159 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL); 3160 if (!ret) { 3161 DRM_ERROR("failed to set up gmch\n"); 3162 return -EIO; 3163 } 3164 3165 intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size, 3166 &ggtt->mappable_base, &ggtt->mappable_end); 3167 3168 ggtt->do_idle_maps = needs_idle_maps(dev_priv); 3169 ggtt->base.insert_page = i915_ggtt_insert_page; 3170 ggtt->base.insert_entries = i915_ggtt_insert_entries; 3171 ggtt->base.clear_range = i915_ggtt_clear_range; 3172 ggtt->base.bind_vma = ggtt_bind_vma; 3173 ggtt->base.unbind_vma = ggtt_unbind_vma; 3174 ggtt->base.cleanup = i915_gmch_remove; 3175 3176 if (unlikely(ggtt->do_idle_maps)) 3177 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 3178 3179 return 0; 3180 } 3181 3182 /** 3183 * i915_ggtt_probe_hw - Probe GGTT hardware location 3184 * @dev_priv: i915 device 3185 */ 3186 int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) 3187 { 3188 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3189 int ret; 3190 3191 ggtt->base.dev = &dev_priv->drm; 3192 3193 if (INTEL_GEN(dev_priv) <= 5) 3194 ret = i915_gmch_probe(ggtt); 3195 else if (INTEL_GEN(dev_priv) < 8) 3196 ret = gen6_gmch_probe(ggtt); 3197 else 3198 ret = gen8_gmch_probe(ggtt); 3199 if (ret) 3200 return ret; 3201 3202 if ((ggtt->base.total - 1) >> 32) { 3203 DRM_ERROR("We never expected a Global GTT with more than 32bits" 3204 " of address space! Found %lldM!\n", 3205 ggtt->base.total >> 20); 3206 ggtt->base.total = 1ULL << 32; 3207 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); 3208 } 3209 3210 if (ggtt->mappable_end > ggtt->base.total) { 3211 DRM_ERROR("mappable aperture extends past end of GGTT," 3212 " aperture=%llx, total=%llx\n", 3213 ggtt->mappable_end, ggtt->base.total); 3214 ggtt->mappable_end = ggtt->base.total; 3215 } 3216 3217 /* GMADR is the PCI mmio aperture into the global GTT. */ 3218 DRM_INFO("Memory usable by graphics device = %lluM\n", 3219 ggtt->base.total >> 20); 3220 DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20); 3221 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", ggtt->stolen_size >> 20); 3222 #ifdef CONFIG_INTEL_IOMMU 3223 if (intel_iommu_gfx_mapped) 3224 DRM_INFO("VT-d active for gfx access\n"); 3225 #endif 3226 3227 return 0; 3228 } 3229 3230 /** 3231 * i915_ggtt_init_hw - Initialize GGTT hardware 3232 * @dev_priv: i915 device 3233 */ 3234 int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) 3235 { 3236 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3237 int ret; 3238 3239 INIT_LIST_HEAD(&dev_priv->vm_list); 3240 3241 /* Subtract the guard page before address space initialization to 3242 * shrink the range used by drm_mm. 3243 */ 3244 mutex_lock(&dev_priv->drm.struct_mutex); 3245 ggtt->base.total -= PAGE_SIZE; 3246 i915_address_space_init(&ggtt->base, dev_priv, "[global]"); 3247 ggtt->base.total += PAGE_SIZE; 3248 if (!HAS_LLC(dev_priv)) 3249 ggtt->base.mm.color_adjust = i915_gtt_color_adjust; 3250 mutex_unlock(&dev_priv->drm.struct_mutex); 3251 3252 if (!io_mapping_init_wc(&dev_priv->ggtt.mappable, 3253 dev_priv->ggtt.mappable_base, 3254 dev_priv->ggtt.mappable_end)) { 3255 ret = -EIO; 3256 goto out_gtt_cleanup; 3257 } 3258 3259 ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end); 3260 3261 /* 3262 * Initialise stolen early so that we may reserve preallocated 3263 * objects for the BIOS to KMS transition. 3264 */ 3265 ret = i915_gem_init_stolen(&dev_priv->drm); 3266 if (ret) 3267 goto out_gtt_cleanup; 3268 3269 return 0; 3270 3271 out_gtt_cleanup: 3272 ggtt->base.cleanup(&ggtt->base); 3273 return ret; 3274 } 3275 3276 int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) 3277 { 3278 if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt()) 3279 return -EIO; 3280 3281 return 0; 3282 } 3283 3284 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 3285 { 3286 struct drm_i915_private *dev_priv = to_i915(dev); 3287 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3288 struct drm_i915_gem_object *obj, *on; 3289 3290 i915_check_and_clear_faults(dev_priv); 3291 3292 /* First fill our portion of the GTT with scratch pages */ 3293 ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); 3294 3295 ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */ 3296 3297 /* clflush objects bound into the GGTT and rebind them. */ 3298 list_for_each_entry_safe(obj, on, 3299 &dev_priv->mm.bound_list, global_link) { 3300 bool ggtt_bound = false; 3301 struct i915_vma *vma; 3302 3303 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3304 if (vma->vm != &ggtt->base) 3305 continue; 3306 3307 if (!i915_vma_unbind(vma)) 3308 continue; 3309 3310 WARN_ON(i915_vma_bind(vma, obj->cache_level, 3311 PIN_UPDATE)); 3312 ggtt_bound = true; 3313 } 3314 3315 if (ggtt_bound) 3316 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); 3317 } 3318 3319 ggtt->base.closed = false; 3320 3321 if (INTEL_INFO(dev)->gen >= 8) { 3322 if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) 3323 chv_setup_private_ppat(dev_priv); 3324 else 3325 bdw_setup_private_ppat(dev_priv); 3326 3327 return; 3328 } 3329 3330 if (USES_PPGTT(dev)) { 3331 struct i915_address_space *vm; 3332 3333 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 3334 /* TODO: Perhaps it shouldn't be gen6 specific */ 3335 3336 struct i915_hw_ppgtt *ppgtt; 3337 3338 if (i915_is_ggtt(vm)) 3339 ppgtt = dev_priv->mm.aliasing_ppgtt; 3340 else 3341 ppgtt = i915_vm_to_ppgtt(vm); 3342 3343 gen6_write_page_range(dev_priv, &ppgtt->pd, 3344 0, ppgtt->base.total); 3345 } 3346 } 3347 3348 i915_ggtt_flush(dev_priv); 3349 } 3350 3351 static void 3352 i915_vma_retire(struct i915_gem_active *active, 3353 struct drm_i915_gem_request *rq) 3354 { 3355 const unsigned int idx = rq->engine->id; 3356 struct i915_vma *vma = 3357 container_of(active, struct i915_vma, last_read[idx]); 3358 struct drm_i915_gem_object *obj = vma->obj; 3359 3360 GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx)); 3361 3362 i915_vma_clear_active(vma, idx); 3363 if (i915_vma_is_active(vma)) 3364 return; 3365 3366 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 3367 if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) 3368 WARN_ON(i915_vma_unbind(vma)); 3369 3370 GEM_BUG_ON(!i915_gem_object_is_active(obj)); 3371 if (--obj->active_count) 3372 return; 3373 3374 /* Bump our place on the bound list to keep it roughly in LRU order 3375 * so that we don't steal from recently used but inactive objects 3376 * (unless we are forced to ofc!) 3377 */ 3378 if (obj->bind_count) 3379 list_move_tail(&obj->global_link, &rq->i915->mm.bound_list); 3380 3381 obj->mm.dirty = true; /* be paranoid */ 3382 3383 if (i915_gem_object_has_active_reference(obj)) { 3384 i915_gem_object_clear_active_reference(obj); 3385 i915_gem_object_put(obj); 3386 } 3387 } 3388 3389 static void 3390 i915_ggtt_retire__write(struct i915_gem_active *active, 3391 struct drm_i915_gem_request *request) 3392 { 3393 struct i915_vma *vma = 3394 container_of(active, struct i915_vma, last_write); 3395 3396 intel_fb_obj_flush(vma->obj, true, ORIGIN_CS); 3397 } 3398 3399 void i915_vma_destroy(struct i915_vma *vma) 3400 { 3401 GEM_BUG_ON(vma->node.allocated); 3402 GEM_BUG_ON(i915_vma_is_active(vma)); 3403 GEM_BUG_ON(!i915_vma_is_closed(vma)); 3404 GEM_BUG_ON(vma->fence); 3405 3406 list_del(&vma->vm_link); 3407 if (!i915_vma_is_ggtt(vma)) 3408 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); 3409 3410 kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); 3411 } 3412 3413 void i915_vma_close(struct i915_vma *vma) 3414 { 3415 GEM_BUG_ON(i915_vma_is_closed(vma)); 3416 vma->flags |= I915_VMA_CLOSED; 3417 3418 list_del(&vma->obj_link); 3419 rb_erase(&vma->obj_node, &vma->obj->vma_tree); 3420 3421 if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) 3422 WARN_ON(i915_vma_unbind(vma)); 3423 } 3424 3425 static inline long vma_compare(struct i915_vma *vma, 3426 struct i915_address_space *vm, 3427 const struct i915_ggtt_view *view) 3428 { 3429 GEM_BUG_ON(view && !i915_is_ggtt(vm)); 3430 3431 if (vma->vm != vm) 3432 return vma->vm - vm; 3433 3434 if (!view) 3435 return vma->ggtt_view.type; 3436 3437 if (vma->ggtt_view.type != view->type) 3438 return vma->ggtt_view.type - view->type; 3439 3440 return memcmp(&vma->ggtt_view.params, 3441 &view->params, 3442 sizeof(view->params)); 3443 } 3444 3445 static struct i915_vma * 3446 __i915_vma_create(struct drm_i915_gem_object *obj, 3447 struct i915_address_space *vm, 3448 const struct i915_ggtt_view *view) 3449 { 3450 struct i915_vma *vma; 3451 struct rb_node *rb, **p; 3452 int i; 3453 3454 GEM_BUG_ON(vm->closed); 3455 3456 vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); 3457 if (vma == NULL) 3458 return ERR_PTR(-ENOMEM); 3459 3460 INIT_LIST_HEAD(&vma->exec_list); 3461 for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) 3462 init_request_active(&vma->last_read[i], i915_vma_retire); 3463 init_request_active(&vma->last_write, 3464 i915_is_ggtt(vm) ? i915_ggtt_retire__write : NULL); 3465 init_request_active(&vma->last_fence, NULL); 3466 list_add(&vma->vm_link, &vm->unbound_list); 3467 vma->vm = vm; 3468 vma->obj = obj; 3469 vma->size = obj->base.size; 3470 3471 if (view) { 3472 vma->ggtt_view = *view; 3473 if (view->type == I915_GGTT_VIEW_PARTIAL) { 3474 vma->size = view->params.partial.size; 3475 vma->size <<= PAGE_SHIFT; 3476 } else if (view->type == I915_GGTT_VIEW_ROTATED) { 3477 vma->size = 3478 intel_rotation_info_size(&view->params.rotated); 3479 vma->size <<= PAGE_SHIFT; 3480 } 3481 } 3482 3483 if (i915_is_ggtt(vm)) { 3484 vma->flags |= I915_VMA_GGTT; 3485 list_add(&vma->obj_link, &obj->vma_list); 3486 } else { 3487 i915_ppgtt_get(i915_vm_to_ppgtt(vm)); 3488 list_add_tail(&vma->obj_link, &obj->vma_list); 3489 } 3490 3491 rb = NULL; 3492 p = &obj->vma_tree.rb_node; 3493 while (*p) { 3494 struct i915_vma *pos; 3495 3496 rb = *p; 3497 pos = rb_entry(rb, struct i915_vma, obj_node); 3498 if (vma_compare(pos, vm, view) < 0) 3499 p = &rb->rb_right; 3500 else 3501 p = &rb->rb_left; 3502 } 3503 rb_link_node(&vma->obj_node, rb, p); 3504 rb_insert_color(&vma->obj_node, &obj->vma_tree); 3505 3506 return vma; 3507 } 3508 3509 struct i915_vma * 3510 i915_vma_create(struct drm_i915_gem_object *obj, 3511 struct i915_address_space *vm, 3512 const struct i915_ggtt_view *view) 3513 { 3514 lockdep_assert_held(&obj->base.dev->struct_mutex); 3515 GEM_BUG_ON(view && !i915_is_ggtt(vm)); 3516 GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view)); 3517 3518 return __i915_vma_create(obj, vm, view); 3519 } 3520 3521 struct i915_vma * 3522 i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 3523 struct i915_address_space *vm, 3524 const struct i915_ggtt_view *view) 3525 { 3526 struct rb_node *rb; 3527 3528 rb = obj->vma_tree.rb_node; 3529 while (rb) { 3530 struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node); 3531 long cmp; 3532 3533 cmp = vma_compare(vma, vm, view); 3534 if (cmp == 0) 3535 return vma; 3536 3537 if (cmp < 0) 3538 rb = rb->rb_right; 3539 else 3540 rb = rb->rb_left; 3541 } 3542 3543 return NULL; 3544 } 3545 3546 struct i915_vma * 3547 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 3548 struct i915_address_space *vm, 3549 const struct i915_ggtt_view *view) 3550 { 3551 struct i915_vma *vma; 3552 3553 lockdep_assert_held(&obj->base.dev->struct_mutex); 3554 GEM_BUG_ON(view && !i915_is_ggtt(vm)); 3555 3556 vma = i915_gem_obj_to_vma(obj, vm, view); 3557 if (!vma) { 3558 vma = __i915_vma_create(obj, vm, view); 3559 GEM_BUG_ON(vma != i915_gem_obj_to_vma(obj, vm, view)); 3560 } 3561 3562 GEM_BUG_ON(i915_vma_is_closed(vma)); 3563 return vma; 3564 } 3565 3566 static struct scatterlist * 3567 rotate_pages(const dma_addr_t *in, unsigned int offset, 3568 unsigned int width, unsigned int height, 3569 unsigned int stride, 3570 struct sg_table *st, struct scatterlist *sg) 3571 { 3572 unsigned int column, row; 3573 unsigned int src_idx; 3574 3575 for (column = 0; column < width; column++) { 3576 src_idx = stride * (height - 1) + column; 3577 for (row = 0; row < height; row++) { 3578 st->nents++; 3579 /* We don't need the pages, but need to initialize 3580 * the entries so the sg list can be happily traversed. 3581 * The only thing we need are DMA addresses. 3582 */ 3583 sg_set_page(sg, NULL, PAGE_SIZE, 0); 3584 sg_dma_address(sg) = in[offset + src_idx]; 3585 sg_dma_len(sg) = PAGE_SIZE; 3586 sg = sg_next(sg); 3587 src_idx -= stride; 3588 } 3589 } 3590 3591 return sg; 3592 } 3593 3594 static struct sg_table * 3595 intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info, 3596 struct drm_i915_gem_object *obj) 3597 { 3598 const size_t n_pages = obj->base.size / PAGE_SIZE; 3599 unsigned int size = intel_rotation_info_size(rot_info); 3600 struct sgt_iter sgt_iter; 3601 dma_addr_t dma_addr; 3602 unsigned long i; 3603 dma_addr_t *page_addr_list; 3604 struct sg_table *st; 3605 struct scatterlist *sg; 3606 int ret = -ENOMEM; 3607 3608 /* Allocate a temporary list of source pages for random access. */ 3609 page_addr_list = drm_malloc_gfp(n_pages, 3610 sizeof(dma_addr_t), 3611 GFP_TEMPORARY); 3612 if (!page_addr_list) 3613 return ERR_PTR(ret); 3614 3615 /* Allocate target SG list. */ 3616 st = kmalloc(sizeof(*st), GFP_KERNEL); 3617 if (!st) 3618 goto err_st_alloc; 3619 3620 ret = sg_alloc_table(st, size, GFP_KERNEL); 3621 if (ret) 3622 goto err_sg_alloc; 3623 3624 /* Populate source page list from the object. */ 3625 i = 0; 3626 for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages) 3627 page_addr_list[i++] = dma_addr; 3628 3629 GEM_BUG_ON(i != n_pages); 3630 st->nents = 0; 3631 sg = st->sgl; 3632 3633 for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { 3634 sg = rotate_pages(page_addr_list, rot_info->plane[i].offset, 3635 rot_info->plane[i].width, rot_info->plane[i].height, 3636 rot_info->plane[i].stride, st, sg); 3637 } 3638 3639 DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n", 3640 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); 3641 3642 drm_free_large(page_addr_list); 3643 3644 return st; 3645 3646 err_sg_alloc: 3647 kfree(st); 3648 err_st_alloc: 3649 drm_free_large(page_addr_list); 3650 3651 DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", 3652 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); 3653 3654 return ERR_PTR(ret); 3655 } 3656 3657 static struct sg_table * 3658 intel_partial_pages(const struct i915_ggtt_view *view, 3659 struct drm_i915_gem_object *obj) 3660 { 3661 struct sg_table *st; 3662 struct scatterlist *sg, *iter; 3663 unsigned int count = view->params.partial.size; 3664 unsigned int offset; 3665 int ret = -ENOMEM; 3666 3667 st = kmalloc(sizeof(*st), GFP_KERNEL); 3668 if (!st) 3669 goto err_st_alloc; 3670 3671 ret = sg_alloc_table(st, count, GFP_KERNEL); 3672 if (ret) 3673 goto err_sg_alloc; 3674 3675 iter = i915_gem_object_get_sg(obj, 3676 view->params.partial.offset, 3677 &offset); 3678 GEM_BUG_ON(!iter); 3679 3680 sg = st->sgl; 3681 st->nents = 0; 3682 do { 3683 unsigned int len; 3684 3685 len = min(iter->length - (offset << PAGE_SHIFT), 3686 count << PAGE_SHIFT); 3687 sg_set_page(sg, NULL, len, 0); 3688 sg_dma_address(sg) = 3689 sg_dma_address(iter) + (offset << PAGE_SHIFT); 3690 sg_dma_len(sg) = len; 3691 3692 st->nents++; 3693 count -= len >> PAGE_SHIFT; 3694 if (count == 0) { 3695 sg_mark_end(sg); 3696 return st; 3697 } 3698 3699 sg = __sg_next(sg); 3700 iter = __sg_next(iter); 3701 offset = 0; 3702 } while (1); 3703 3704 err_sg_alloc: 3705 kfree(st); 3706 err_st_alloc: 3707 return ERR_PTR(ret); 3708 } 3709 3710 static int 3711 i915_get_ggtt_vma_pages(struct i915_vma *vma) 3712 { 3713 int ret = 0; 3714 3715 /* The vma->pages are only valid within the lifespan of the borrowed 3716 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so 3717 * must be the vma->pages. A simple rule is that vma->pages must only 3718 * be accessed when the obj->mm.pages are pinned. 3719 */ 3720 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); 3721 3722 if (vma->pages) 3723 return 0; 3724 3725 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) 3726 vma->pages = vma->obj->mm.pages; 3727 else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) 3728 vma->pages = 3729 intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj); 3730 else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) 3731 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); 3732 else 3733 WARN_ONCE(1, "GGTT view %u not implemented!\n", 3734 vma->ggtt_view.type); 3735 3736 if (!vma->pages) { 3737 DRM_ERROR("Failed to get pages for GGTT view type %u!\n", 3738 vma->ggtt_view.type); 3739 ret = -EINVAL; 3740 } else if (IS_ERR(vma->pages)) { 3741 ret = PTR_ERR(vma->pages); 3742 vma->pages = NULL; 3743 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", 3744 vma->ggtt_view.type, ret); 3745 } 3746 3747 return ret; 3748 } 3749 3750 /** 3751 * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. 3752 * @vma: VMA to map 3753 * @cache_level: mapping cache level 3754 * @flags: flags like global or local mapping 3755 * 3756 * DMA addresses are taken from the scatter-gather table of this object (or of 3757 * this VMA in case of non-default GGTT views) and PTE entries set up. 3758 * Note that DMA addresses are also the only part of the SG table we care about. 3759 */ 3760 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, 3761 u32 flags) 3762 { 3763 u32 bind_flags; 3764 u32 vma_flags; 3765 int ret; 3766 3767 if (WARN_ON(flags == 0)) 3768 return -EINVAL; 3769 3770 bind_flags = 0; 3771 if (flags & PIN_GLOBAL) 3772 bind_flags |= I915_VMA_GLOBAL_BIND; 3773 if (flags & PIN_USER) 3774 bind_flags |= I915_VMA_LOCAL_BIND; 3775 3776 vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); 3777 if (flags & PIN_UPDATE) 3778 bind_flags |= vma_flags; 3779 else 3780 bind_flags &= ~vma_flags; 3781 if (bind_flags == 0) 3782 return 0; 3783 3784 if (vma_flags == 0 && vma->vm->allocate_va_range) { 3785 trace_i915_va_alloc(vma); 3786 ret = vma->vm->allocate_va_range(vma->vm, 3787 vma->node.start, 3788 vma->node.size); 3789 if (ret) 3790 return ret; 3791 } 3792 3793 ret = vma->vm->bind_vma(vma, cache_level, bind_flags); 3794 if (ret) 3795 return ret; 3796 3797 vma->flags |= bind_flags; 3798 return 0; 3799 } 3800 3801 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) 3802 { 3803 void __iomem *ptr; 3804 3805 /* Access through the GTT requires the device to be awake. */ 3806 assert_rpm_wakelock_held(to_i915(vma->vm->dev)); 3807 3808 lockdep_assert_held(&vma->vm->dev->struct_mutex); 3809 if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) 3810 return IO_ERR_PTR(-ENODEV); 3811 3812 GEM_BUG_ON(!i915_vma_is_ggtt(vma)); 3813 GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); 3814 3815 ptr = vma->iomap; 3816 if (ptr == NULL) { 3817 ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable, 3818 vma->node.start, 3819 vma->node.size); 3820 if (ptr == NULL) 3821 return IO_ERR_PTR(-ENOMEM); 3822 3823 vma->iomap = ptr; 3824 } 3825 3826 __i915_vma_pin(vma); 3827 return ptr; 3828 } 3829 3830 void i915_vma_unpin_and_release(struct i915_vma **p_vma) 3831 { 3832 struct i915_vma *vma; 3833 struct drm_i915_gem_object *obj; 3834 3835 vma = fetch_and_zero(p_vma); 3836 if (!vma) 3837 return; 3838 3839 obj = vma->obj; 3840 3841 i915_vma_unpin(vma); 3842 i915_vma_close(vma); 3843 3844 __i915_gem_object_release_unless_active(obj); 3845 } 3846