1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/seq_file.h> 27 #include <linux/stop_machine.h> 28 #include <drm/drmP.h> 29 #include <drm/i915_drm.h> 30 #include "i915_drv.h" 31 #include "i915_vgpu.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 35 #define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM) 36 37 /** 38 * DOC: Global GTT views 39 * 40 * Background and previous state 41 * 42 * Historically objects could exists (be bound) in global GTT space only as 43 * singular instances with a view representing all of the object's backing pages 44 * in a linear fashion. This view will be called a normal view. 45 * 46 * To support multiple views of the same object, where the number of mapped 47 * pages is not equal to the backing store, or where the layout of the pages 48 * is not linear, concept of a GGTT view was added. 49 * 50 * One example of an alternative view is a stereo display driven by a single 51 * image. In this case we would have a framebuffer looking like this 52 * (2x2 pages): 53 * 54 * 12 55 * 34 56 * 57 * Above would represent a normal GGTT view as normally mapped for GPU or CPU 58 * rendering. In contrast, fed to the display engine would be an alternative 59 * view which could look something like this: 60 * 61 * 1212 62 * 3434 63 * 64 * In this example both the size and layout of pages in the alternative view is 65 * different from the normal view. 66 * 67 * Implementation and usage 68 * 69 * GGTT views are implemented using VMAs and are distinguished via enum 70 * i915_ggtt_view_type and struct i915_ggtt_view. 71 * 72 * A new flavour of core GEM functions which work with GGTT bound objects were 73 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid 74 * renaming in large amounts of code. They take the struct i915_ggtt_view 75 * parameter encapsulating all metadata required to implement a view. 76 * 77 * As a helper for callers which are only interested in the normal view, 78 * globally const i915_ggtt_view_normal singleton instance exists. All old core 79 * GEM API functions, the ones not taking the view parameter, are operating on, 80 * or with the normal GGTT view. 81 * 82 * Code wanting to add or use a new GGTT view needs to: 83 * 84 * 1. Add a new enum with a suitable name. 85 * 2. Extend the metadata in the i915_ggtt_view structure if required. 86 * 3. Add support to i915_get_vma_pages(). 87 * 88 * New views are required to build a scatter-gather table from within the 89 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and 90 * exists for the lifetime of an VMA. 91 * 92 * Core API is designed to have copy semantics which means that passed in 93 * struct i915_ggtt_view does not need to be persistent (left around after 94 * calling the core API functions). 95 * 96 */ 97 98 static inline struct i915_ggtt * 99 i915_vm_to_ggtt(struct i915_address_space *vm) 100 { 101 GEM_BUG_ON(!i915_is_ggtt(vm)); 102 return container_of(vm, struct i915_ggtt, base); 103 } 104 105 static int 106 i915_get_ggtt_vma_pages(struct i915_vma *vma); 107 108 const struct i915_ggtt_view i915_ggtt_view_normal = { 109 .type = I915_GGTT_VIEW_NORMAL, 110 }; 111 const struct i915_ggtt_view i915_ggtt_view_rotated = { 112 .type = I915_GGTT_VIEW_ROTATED, 113 }; 114 115 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, 116 int enable_ppgtt) 117 { 118 bool has_aliasing_ppgtt; 119 bool has_full_ppgtt; 120 bool has_full_48bit_ppgtt; 121 122 has_aliasing_ppgtt = INTEL_GEN(dev_priv) >= 6; 123 has_full_ppgtt = INTEL_GEN(dev_priv) >= 7; 124 has_full_48bit_ppgtt = 125 IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9; 126 127 if (intel_vgpu_active(dev_priv)) { 128 /* emulation is too hard */ 129 has_full_ppgtt = false; 130 has_full_48bit_ppgtt = false; 131 } 132 133 if (!has_aliasing_ppgtt) 134 return 0; 135 136 /* 137 * We don't allow disabling PPGTT for gen9+ as it's a requirement for 138 * execlists, the sole mechanism available to submit work. 139 */ 140 if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9) 141 return 0; 142 143 if (enable_ppgtt == 1) 144 return 1; 145 146 if (enable_ppgtt == 2 && has_full_ppgtt) 147 return 2; 148 149 if (enable_ppgtt == 3 && has_full_48bit_ppgtt) 150 return 3; 151 152 #ifdef CONFIG_INTEL_IOMMU 153 /* Disable ppgtt on SNB if VT-d is on. */ 154 if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) { 155 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 156 return 0; 157 } 158 #endif 159 160 /* Early VLV doesn't have this */ 161 if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) { 162 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n"); 163 return 0; 164 } 165 166 if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt) 167 return has_full_48bit_ppgtt ? 3 : 2; 168 else 169 return has_aliasing_ppgtt ? 1 : 0; 170 } 171 172 static int ppgtt_bind_vma(struct i915_vma *vma, 173 enum i915_cache_level cache_level, 174 u32 unused) 175 { 176 u32 pte_flags = 0; 177 178 vma->pages = vma->obj->pages; 179 180 /* Currently applicable only to VLV */ 181 if (vma->obj->gt_ro) 182 pte_flags |= PTE_READ_ONLY; 183 184 vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, 185 cache_level, pte_flags); 186 187 return 0; 188 } 189 190 static void ppgtt_unbind_vma(struct i915_vma *vma) 191 { 192 vma->vm->clear_range(vma->vm, 193 vma->node.start, 194 vma->size, 195 true); 196 } 197 198 static gen8_pte_t gen8_pte_encode(dma_addr_t addr, 199 enum i915_cache_level level, 200 bool valid) 201 { 202 gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; 203 pte |= addr; 204 205 switch (level) { 206 case I915_CACHE_NONE: 207 pte |= PPAT_UNCACHED_INDEX; 208 break; 209 case I915_CACHE_WT: 210 pte |= PPAT_DISPLAY_ELLC_INDEX; 211 break; 212 default: 213 pte |= PPAT_CACHED_INDEX; 214 break; 215 } 216 217 return pte; 218 } 219 220 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, 221 const enum i915_cache_level level) 222 { 223 gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 224 pde |= addr; 225 if (level != I915_CACHE_NONE) 226 pde |= PPAT_CACHED_PDE_INDEX; 227 else 228 pde |= PPAT_UNCACHED_INDEX; 229 return pde; 230 } 231 232 #define gen8_pdpe_encode gen8_pde_encode 233 #define gen8_pml4e_encode gen8_pde_encode 234 235 static gen6_pte_t snb_pte_encode(dma_addr_t addr, 236 enum i915_cache_level level, 237 bool valid, u32 unused) 238 { 239 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 240 pte |= GEN6_PTE_ADDR_ENCODE(addr); 241 242 switch (level) { 243 case I915_CACHE_L3_LLC: 244 case I915_CACHE_LLC: 245 pte |= GEN6_PTE_CACHE_LLC; 246 break; 247 case I915_CACHE_NONE: 248 pte |= GEN6_PTE_UNCACHED; 249 break; 250 default: 251 MISSING_CASE(level); 252 } 253 254 return pte; 255 } 256 257 static gen6_pte_t ivb_pte_encode(dma_addr_t addr, 258 enum i915_cache_level level, 259 bool valid, u32 unused) 260 { 261 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 262 pte |= GEN6_PTE_ADDR_ENCODE(addr); 263 264 switch (level) { 265 case I915_CACHE_L3_LLC: 266 pte |= GEN7_PTE_CACHE_L3_LLC; 267 break; 268 case I915_CACHE_LLC: 269 pte |= GEN6_PTE_CACHE_LLC; 270 break; 271 case I915_CACHE_NONE: 272 pte |= GEN6_PTE_UNCACHED; 273 break; 274 default: 275 MISSING_CASE(level); 276 } 277 278 return pte; 279 } 280 281 static gen6_pte_t byt_pte_encode(dma_addr_t addr, 282 enum i915_cache_level level, 283 bool valid, u32 flags) 284 { 285 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 286 pte |= GEN6_PTE_ADDR_ENCODE(addr); 287 288 if (!(flags & PTE_READ_ONLY)) 289 pte |= BYT_PTE_WRITEABLE; 290 291 if (level != I915_CACHE_NONE) 292 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 293 294 return pte; 295 } 296 297 static gen6_pte_t hsw_pte_encode(dma_addr_t addr, 298 enum i915_cache_level level, 299 bool valid, u32 unused) 300 { 301 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 302 pte |= HSW_PTE_ADDR_ENCODE(addr); 303 304 if (level != I915_CACHE_NONE) 305 pte |= HSW_WB_LLC_AGE3; 306 307 return pte; 308 } 309 310 static gen6_pte_t iris_pte_encode(dma_addr_t addr, 311 enum i915_cache_level level, 312 bool valid, u32 unused) 313 { 314 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 315 pte |= HSW_PTE_ADDR_ENCODE(addr); 316 317 switch (level) { 318 case I915_CACHE_NONE: 319 break; 320 case I915_CACHE_WT: 321 pte |= HSW_WT_ELLC_LLC_AGE3; 322 break; 323 default: 324 pte |= HSW_WB_ELLC_LLC_AGE3; 325 break; 326 } 327 328 return pte; 329 } 330 331 static int __setup_page_dma(struct drm_device *dev, 332 struct i915_page_dma *p, gfp_t flags) 333 { 334 struct device *kdev = &dev->pdev->dev; 335 336 p->page = alloc_page(flags); 337 if (!p->page) 338 return -ENOMEM; 339 340 p->daddr = dma_map_page(kdev, 341 p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL); 342 343 if (dma_mapping_error(kdev, p->daddr)) { 344 __free_page(p->page); 345 return -EINVAL; 346 } 347 348 return 0; 349 } 350 351 static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p) 352 { 353 return __setup_page_dma(dev, p, I915_GFP_DMA); 354 } 355 356 static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p) 357 { 358 struct pci_dev *pdev = dev->pdev; 359 360 if (WARN_ON(!p->page)) 361 return; 362 363 dma_unmap_page(&pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL); 364 __free_page(p->page); 365 memset(p, 0, sizeof(*p)); 366 } 367 368 static void *kmap_page_dma(struct i915_page_dma *p) 369 { 370 return kmap_atomic(p->page); 371 } 372 373 /* We use the flushing unmap only with ppgtt structures: 374 * page directories, page tables and scratch pages. 375 */ 376 static void kunmap_page_dma(struct drm_device *dev, void *vaddr) 377 { 378 /* There are only few exceptions for gen >=6. chv and bxt. 379 * And we are not sure about the latter so play safe for now. 380 */ 381 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) 382 drm_clflush_virt_range(vaddr, PAGE_SIZE); 383 384 kunmap_atomic(vaddr); 385 } 386 387 #define kmap_px(px) kmap_page_dma(px_base(px)) 388 #define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr)) 389 390 #define setup_px(dev, px) setup_page_dma((dev), px_base(px)) 391 #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px)) 392 #define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v)) 393 #define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v)) 394 395 static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p, 396 const uint64_t val) 397 { 398 int i; 399 uint64_t * const vaddr = kmap_page_dma(p); 400 401 for (i = 0; i < 512; i++) 402 vaddr[i] = val; 403 404 kunmap_page_dma(dev, vaddr); 405 } 406 407 static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p, 408 const uint32_t val32) 409 { 410 uint64_t v = val32; 411 412 v = v << 32 | val32; 413 414 fill_page_dma(dev, p, v); 415 } 416 417 static int 418 setup_scratch_page(struct drm_device *dev, 419 struct i915_page_dma *scratch, 420 gfp_t gfp) 421 { 422 return __setup_page_dma(dev, scratch, gfp | __GFP_ZERO); 423 } 424 425 static void cleanup_scratch_page(struct drm_device *dev, 426 struct i915_page_dma *scratch) 427 { 428 cleanup_page_dma(dev, scratch); 429 } 430 431 static struct i915_page_table *alloc_pt(struct drm_device *dev) 432 { 433 struct i915_page_table *pt; 434 const size_t count = INTEL_INFO(dev)->gen >= 8 ? 435 GEN8_PTES : GEN6_PTES; 436 int ret = -ENOMEM; 437 438 pt = kzalloc(sizeof(*pt), GFP_KERNEL); 439 if (!pt) 440 return ERR_PTR(-ENOMEM); 441 442 pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), 443 GFP_KERNEL); 444 445 if (!pt->used_ptes) 446 goto fail_bitmap; 447 448 ret = setup_px(dev, pt); 449 if (ret) 450 goto fail_page_m; 451 452 return pt; 453 454 fail_page_m: 455 kfree(pt->used_ptes); 456 fail_bitmap: 457 kfree(pt); 458 459 return ERR_PTR(ret); 460 } 461 462 static void free_pt(struct drm_device *dev, struct i915_page_table *pt) 463 { 464 cleanup_px(dev, pt); 465 kfree(pt->used_ptes); 466 kfree(pt); 467 } 468 469 static void gen8_initialize_pt(struct i915_address_space *vm, 470 struct i915_page_table *pt) 471 { 472 gen8_pte_t scratch_pte; 473 474 scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 475 I915_CACHE_LLC, true); 476 477 fill_px(vm->dev, pt, scratch_pte); 478 } 479 480 static void gen6_initialize_pt(struct i915_address_space *vm, 481 struct i915_page_table *pt) 482 { 483 gen6_pte_t scratch_pte; 484 485 WARN_ON(vm->scratch_page.daddr == 0); 486 487 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 488 I915_CACHE_LLC, true, 0); 489 490 fill32_px(vm->dev, pt, scratch_pte); 491 } 492 493 static struct i915_page_directory *alloc_pd(struct drm_device *dev) 494 { 495 struct i915_page_directory *pd; 496 int ret = -ENOMEM; 497 498 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 499 if (!pd) 500 return ERR_PTR(-ENOMEM); 501 502 pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES), 503 sizeof(*pd->used_pdes), GFP_KERNEL); 504 if (!pd->used_pdes) 505 goto fail_bitmap; 506 507 ret = setup_px(dev, pd); 508 if (ret) 509 goto fail_page_m; 510 511 return pd; 512 513 fail_page_m: 514 kfree(pd->used_pdes); 515 fail_bitmap: 516 kfree(pd); 517 518 return ERR_PTR(ret); 519 } 520 521 static void free_pd(struct drm_device *dev, struct i915_page_directory *pd) 522 { 523 if (px_page(pd)) { 524 cleanup_px(dev, pd); 525 kfree(pd->used_pdes); 526 kfree(pd); 527 } 528 } 529 530 static void gen8_initialize_pd(struct i915_address_space *vm, 531 struct i915_page_directory *pd) 532 { 533 gen8_pde_t scratch_pde; 534 535 scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC); 536 537 fill_px(vm->dev, pd, scratch_pde); 538 } 539 540 static int __pdp_init(struct drm_device *dev, 541 struct i915_page_directory_pointer *pdp) 542 { 543 size_t pdpes = I915_PDPES_PER_PDP(dev); 544 545 pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), 546 sizeof(unsigned long), 547 GFP_KERNEL); 548 if (!pdp->used_pdpes) 549 return -ENOMEM; 550 551 pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory), 552 GFP_KERNEL); 553 if (!pdp->page_directory) { 554 kfree(pdp->used_pdpes); 555 /* the PDP might be the statically allocated top level. Keep it 556 * as clean as possible */ 557 pdp->used_pdpes = NULL; 558 return -ENOMEM; 559 } 560 561 return 0; 562 } 563 564 static void __pdp_fini(struct i915_page_directory_pointer *pdp) 565 { 566 kfree(pdp->used_pdpes); 567 kfree(pdp->page_directory); 568 pdp->page_directory = NULL; 569 } 570 571 static struct 572 i915_page_directory_pointer *alloc_pdp(struct drm_device *dev) 573 { 574 struct i915_page_directory_pointer *pdp; 575 int ret = -ENOMEM; 576 577 WARN_ON(!USES_FULL_48BIT_PPGTT(dev)); 578 579 pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); 580 if (!pdp) 581 return ERR_PTR(-ENOMEM); 582 583 ret = __pdp_init(dev, pdp); 584 if (ret) 585 goto fail_bitmap; 586 587 ret = setup_px(dev, pdp); 588 if (ret) 589 goto fail_page_m; 590 591 return pdp; 592 593 fail_page_m: 594 __pdp_fini(pdp); 595 fail_bitmap: 596 kfree(pdp); 597 598 return ERR_PTR(ret); 599 } 600 601 static void free_pdp(struct drm_device *dev, 602 struct i915_page_directory_pointer *pdp) 603 { 604 __pdp_fini(pdp); 605 if (USES_FULL_48BIT_PPGTT(dev)) { 606 cleanup_px(dev, pdp); 607 kfree(pdp); 608 } 609 } 610 611 static void gen8_initialize_pdp(struct i915_address_space *vm, 612 struct i915_page_directory_pointer *pdp) 613 { 614 gen8_ppgtt_pdpe_t scratch_pdpe; 615 616 scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); 617 618 fill_px(vm->dev, pdp, scratch_pdpe); 619 } 620 621 static void gen8_initialize_pml4(struct i915_address_space *vm, 622 struct i915_pml4 *pml4) 623 { 624 gen8_ppgtt_pml4e_t scratch_pml4e; 625 626 scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), 627 I915_CACHE_LLC); 628 629 fill_px(vm->dev, pml4, scratch_pml4e); 630 } 631 632 static void 633 gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt, 634 struct i915_page_directory_pointer *pdp, 635 struct i915_page_directory *pd, 636 int index) 637 { 638 gen8_ppgtt_pdpe_t *page_directorypo; 639 640 if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) 641 return; 642 643 page_directorypo = kmap_px(pdp); 644 page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); 645 kunmap_px(ppgtt, page_directorypo); 646 } 647 648 static void 649 gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt, 650 struct i915_pml4 *pml4, 651 struct i915_page_directory_pointer *pdp, 652 int index) 653 { 654 gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); 655 656 WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)); 657 pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); 658 kunmap_px(ppgtt, pagemap); 659 } 660 661 /* Broadwell Page Directory Pointer Descriptors */ 662 static int gen8_write_pdp(struct drm_i915_gem_request *req, 663 unsigned entry, 664 dma_addr_t addr) 665 { 666 struct intel_ring *ring = req->ring; 667 struct intel_engine_cs *engine = req->engine; 668 int ret; 669 670 BUG_ON(entry >= 4); 671 672 ret = intel_ring_begin(req, 6); 673 if (ret) 674 return ret; 675 676 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 677 intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, entry)); 678 intel_ring_emit(ring, upper_32_bits(addr)); 679 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 680 intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, entry)); 681 intel_ring_emit(ring, lower_32_bits(addr)); 682 intel_ring_advance(ring); 683 684 return 0; 685 } 686 687 static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, 688 struct drm_i915_gem_request *req) 689 { 690 int i, ret; 691 692 for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { 693 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); 694 695 ret = gen8_write_pdp(req, i, pd_daddr); 696 if (ret) 697 return ret; 698 } 699 700 return 0; 701 } 702 703 static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, 704 struct drm_i915_gem_request *req) 705 { 706 return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); 707 } 708 709 static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm, 710 struct i915_page_directory_pointer *pdp, 711 uint64_t start, 712 uint64_t length, 713 gen8_pte_t scratch_pte) 714 { 715 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 716 gen8_pte_t *pt_vaddr; 717 unsigned pdpe = gen8_pdpe_index(start); 718 unsigned pde = gen8_pde_index(start); 719 unsigned pte = gen8_pte_index(start); 720 unsigned num_entries = length >> PAGE_SHIFT; 721 unsigned last_pte, i; 722 723 if (WARN_ON(!pdp)) 724 return; 725 726 while (num_entries) { 727 struct i915_page_directory *pd; 728 struct i915_page_table *pt; 729 730 if (WARN_ON(!pdp->page_directory[pdpe])) 731 break; 732 733 pd = pdp->page_directory[pdpe]; 734 735 if (WARN_ON(!pd->page_table[pde])) 736 break; 737 738 pt = pd->page_table[pde]; 739 740 if (WARN_ON(!px_page(pt))) 741 break; 742 743 last_pte = pte + num_entries; 744 if (last_pte > GEN8_PTES) 745 last_pte = GEN8_PTES; 746 747 pt_vaddr = kmap_px(pt); 748 749 for (i = pte; i < last_pte; i++) { 750 pt_vaddr[i] = scratch_pte; 751 num_entries--; 752 } 753 754 kunmap_px(ppgtt, pt_vaddr); 755 756 pte = 0; 757 if (++pde == I915_PDES) { 758 if (++pdpe == I915_PDPES_PER_PDP(vm->dev)) 759 break; 760 pde = 0; 761 } 762 } 763 } 764 765 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 766 uint64_t start, 767 uint64_t length, 768 bool use_scratch) 769 { 770 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 771 gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 772 I915_CACHE_LLC, use_scratch); 773 774 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 775 gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length, 776 scratch_pte); 777 } else { 778 uint64_t pml4e; 779 struct i915_page_directory_pointer *pdp; 780 781 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { 782 gen8_ppgtt_clear_pte_range(vm, pdp, start, length, 783 scratch_pte); 784 } 785 } 786 } 787 788 static void 789 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, 790 struct i915_page_directory_pointer *pdp, 791 struct sg_page_iter *sg_iter, 792 uint64_t start, 793 enum i915_cache_level cache_level) 794 { 795 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 796 gen8_pte_t *pt_vaddr; 797 unsigned pdpe = gen8_pdpe_index(start); 798 unsigned pde = gen8_pde_index(start); 799 unsigned pte = gen8_pte_index(start); 800 801 pt_vaddr = NULL; 802 803 while (__sg_page_iter_next(sg_iter)) { 804 if (pt_vaddr == NULL) { 805 struct i915_page_directory *pd = pdp->page_directory[pdpe]; 806 struct i915_page_table *pt = pd->page_table[pde]; 807 pt_vaddr = kmap_px(pt); 808 } 809 810 pt_vaddr[pte] = 811 gen8_pte_encode(sg_page_iter_dma_address(sg_iter), 812 cache_level, true); 813 if (++pte == GEN8_PTES) { 814 kunmap_px(ppgtt, pt_vaddr); 815 pt_vaddr = NULL; 816 if (++pde == I915_PDES) { 817 if (++pdpe == I915_PDPES_PER_PDP(vm->dev)) 818 break; 819 pde = 0; 820 } 821 pte = 0; 822 } 823 } 824 825 if (pt_vaddr) 826 kunmap_px(ppgtt, pt_vaddr); 827 } 828 829 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 830 struct sg_table *pages, 831 uint64_t start, 832 enum i915_cache_level cache_level, 833 u32 unused) 834 { 835 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 836 struct sg_page_iter sg_iter; 837 838 __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); 839 840 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 841 gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start, 842 cache_level); 843 } else { 844 struct i915_page_directory_pointer *pdp; 845 uint64_t pml4e; 846 uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT; 847 848 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { 849 gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, 850 start, cache_level); 851 } 852 } 853 } 854 855 static void gen8_free_page_tables(struct drm_device *dev, 856 struct i915_page_directory *pd) 857 { 858 int i; 859 860 if (!px_page(pd)) 861 return; 862 863 for_each_set_bit(i, pd->used_pdes, I915_PDES) { 864 if (WARN_ON(!pd->page_table[i])) 865 continue; 866 867 free_pt(dev, pd->page_table[i]); 868 pd->page_table[i] = NULL; 869 } 870 } 871 872 static int gen8_init_scratch(struct i915_address_space *vm) 873 { 874 struct drm_device *dev = vm->dev; 875 int ret; 876 877 ret = setup_scratch_page(dev, &vm->scratch_page, I915_GFP_DMA); 878 if (ret) 879 return ret; 880 881 vm->scratch_pt = alloc_pt(dev); 882 if (IS_ERR(vm->scratch_pt)) { 883 ret = PTR_ERR(vm->scratch_pt); 884 goto free_scratch_page; 885 } 886 887 vm->scratch_pd = alloc_pd(dev); 888 if (IS_ERR(vm->scratch_pd)) { 889 ret = PTR_ERR(vm->scratch_pd); 890 goto free_pt; 891 } 892 893 if (USES_FULL_48BIT_PPGTT(dev)) { 894 vm->scratch_pdp = alloc_pdp(dev); 895 if (IS_ERR(vm->scratch_pdp)) { 896 ret = PTR_ERR(vm->scratch_pdp); 897 goto free_pd; 898 } 899 } 900 901 gen8_initialize_pt(vm, vm->scratch_pt); 902 gen8_initialize_pd(vm, vm->scratch_pd); 903 if (USES_FULL_48BIT_PPGTT(dev)) 904 gen8_initialize_pdp(vm, vm->scratch_pdp); 905 906 return 0; 907 908 free_pd: 909 free_pd(dev, vm->scratch_pd); 910 free_pt: 911 free_pt(dev, vm->scratch_pt); 912 free_scratch_page: 913 cleanup_scratch_page(dev, &vm->scratch_page); 914 915 return ret; 916 } 917 918 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) 919 { 920 enum vgt_g2v_type msg; 921 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 922 int i; 923 924 if (USES_FULL_48BIT_PPGTT(dev_priv)) { 925 u64 daddr = px_dma(&ppgtt->pml4); 926 927 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); 928 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); 929 930 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : 931 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); 932 } else { 933 for (i = 0; i < GEN8_LEGACY_PDPES; i++) { 934 u64 daddr = i915_page_dir_dma_addr(ppgtt, i); 935 936 I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); 937 I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); 938 } 939 940 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : 941 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); 942 } 943 944 I915_WRITE(vgtif_reg(g2v_notify), msg); 945 946 return 0; 947 } 948 949 static void gen8_free_scratch(struct i915_address_space *vm) 950 { 951 struct drm_device *dev = vm->dev; 952 953 if (USES_FULL_48BIT_PPGTT(dev)) 954 free_pdp(dev, vm->scratch_pdp); 955 free_pd(dev, vm->scratch_pd); 956 free_pt(dev, vm->scratch_pt); 957 cleanup_scratch_page(dev, &vm->scratch_page); 958 } 959 960 static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev, 961 struct i915_page_directory_pointer *pdp) 962 { 963 int i; 964 965 for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { 966 if (WARN_ON(!pdp->page_directory[i])) 967 continue; 968 969 gen8_free_page_tables(dev, pdp->page_directory[i]); 970 free_pd(dev, pdp->page_directory[i]); 971 } 972 973 free_pdp(dev, pdp); 974 } 975 976 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) 977 { 978 int i; 979 980 for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { 981 if (WARN_ON(!ppgtt->pml4.pdps[i])) 982 continue; 983 984 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]); 985 } 986 987 cleanup_px(ppgtt->base.dev, &ppgtt->pml4); 988 } 989 990 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 991 { 992 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 993 994 if (intel_vgpu_active(to_i915(vm->dev))) 995 gen8_ppgtt_notify_vgt(ppgtt, false); 996 997 if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) 998 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp); 999 else 1000 gen8_ppgtt_cleanup_4lvl(ppgtt); 1001 1002 gen8_free_scratch(vm); 1003 } 1004 1005 /** 1006 * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. 1007 * @vm: Master vm structure. 1008 * @pd: Page directory for this address range. 1009 * @start: Starting virtual address to begin allocations. 1010 * @length: Size of the allocations. 1011 * @new_pts: Bitmap set by function with new allocations. Likely used by the 1012 * caller to free on error. 1013 * 1014 * Allocate the required number of page tables. Extremely similar to 1015 * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by 1016 * the page directory boundary (instead of the page directory pointer). That 1017 * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is 1018 * possible, and likely that the caller will need to use multiple calls of this 1019 * function to achieve the appropriate allocation. 1020 * 1021 * Return: 0 if success; negative error code otherwise. 1022 */ 1023 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, 1024 struct i915_page_directory *pd, 1025 uint64_t start, 1026 uint64_t length, 1027 unsigned long *new_pts) 1028 { 1029 struct drm_device *dev = vm->dev; 1030 struct i915_page_table *pt; 1031 uint32_t pde; 1032 1033 gen8_for_each_pde(pt, pd, start, length, pde) { 1034 /* Don't reallocate page tables */ 1035 if (test_bit(pde, pd->used_pdes)) { 1036 /* Scratch is never allocated this way */ 1037 WARN_ON(pt == vm->scratch_pt); 1038 continue; 1039 } 1040 1041 pt = alloc_pt(dev); 1042 if (IS_ERR(pt)) 1043 goto unwind_out; 1044 1045 gen8_initialize_pt(vm, pt); 1046 pd->page_table[pde] = pt; 1047 __set_bit(pde, new_pts); 1048 trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); 1049 } 1050 1051 return 0; 1052 1053 unwind_out: 1054 for_each_set_bit(pde, new_pts, I915_PDES) 1055 free_pt(dev, pd->page_table[pde]); 1056 1057 return -ENOMEM; 1058 } 1059 1060 /** 1061 * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range. 1062 * @vm: Master vm structure. 1063 * @pdp: Page directory pointer for this address range. 1064 * @start: Starting virtual address to begin allocations. 1065 * @length: Size of the allocations. 1066 * @new_pds: Bitmap set by function with new allocations. Likely used by the 1067 * caller to free on error. 1068 * 1069 * Allocate the required number of page directories starting at the pde index of 1070 * @start, and ending at the pde index @start + @length. This function will skip 1071 * over already allocated page directories within the range, and only allocate 1072 * new ones, setting the appropriate pointer within the pdp as well as the 1073 * correct position in the bitmap @new_pds. 1074 * 1075 * The function will only allocate the pages within the range for a give page 1076 * directory pointer. In other words, if @start + @length straddles a virtually 1077 * addressed PDP boundary (512GB for 4k pages), there will be more allocations 1078 * required by the caller, This is not currently possible, and the BUG in the 1079 * code will prevent it. 1080 * 1081 * Return: 0 if success; negative error code otherwise. 1082 */ 1083 static int 1084 gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, 1085 struct i915_page_directory_pointer *pdp, 1086 uint64_t start, 1087 uint64_t length, 1088 unsigned long *new_pds) 1089 { 1090 struct drm_device *dev = vm->dev; 1091 struct i915_page_directory *pd; 1092 uint32_t pdpe; 1093 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1094 1095 WARN_ON(!bitmap_empty(new_pds, pdpes)); 1096 1097 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1098 if (test_bit(pdpe, pdp->used_pdpes)) 1099 continue; 1100 1101 pd = alloc_pd(dev); 1102 if (IS_ERR(pd)) 1103 goto unwind_out; 1104 1105 gen8_initialize_pd(vm, pd); 1106 pdp->page_directory[pdpe] = pd; 1107 __set_bit(pdpe, new_pds); 1108 trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT); 1109 } 1110 1111 return 0; 1112 1113 unwind_out: 1114 for_each_set_bit(pdpe, new_pds, pdpes) 1115 free_pd(dev, pdp->page_directory[pdpe]); 1116 1117 return -ENOMEM; 1118 } 1119 1120 /** 1121 * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. 1122 * @vm: Master vm structure. 1123 * @pml4: Page map level 4 for this address range. 1124 * @start: Starting virtual address to begin allocations. 1125 * @length: Size of the allocations. 1126 * @new_pdps: Bitmap set by function with new allocations. Likely used by the 1127 * caller to free on error. 1128 * 1129 * Allocate the required number of page directory pointers. Extremely similar to 1130 * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). 1131 * The main difference is here we are limited by the pml4 boundary (instead of 1132 * the page directory pointer). 1133 * 1134 * Return: 0 if success; negative error code otherwise. 1135 */ 1136 static int 1137 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, 1138 struct i915_pml4 *pml4, 1139 uint64_t start, 1140 uint64_t length, 1141 unsigned long *new_pdps) 1142 { 1143 struct drm_device *dev = vm->dev; 1144 struct i915_page_directory_pointer *pdp; 1145 uint32_t pml4e; 1146 1147 WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); 1148 1149 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1150 if (!test_bit(pml4e, pml4->used_pml4es)) { 1151 pdp = alloc_pdp(dev); 1152 if (IS_ERR(pdp)) 1153 goto unwind_out; 1154 1155 gen8_initialize_pdp(vm, pdp); 1156 pml4->pdps[pml4e] = pdp; 1157 __set_bit(pml4e, new_pdps); 1158 trace_i915_page_directory_pointer_entry_alloc(vm, 1159 pml4e, 1160 start, 1161 GEN8_PML4E_SHIFT); 1162 } 1163 } 1164 1165 return 0; 1166 1167 unwind_out: 1168 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1169 free_pdp(dev, pml4->pdps[pml4e]); 1170 1171 return -ENOMEM; 1172 } 1173 1174 static void 1175 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts) 1176 { 1177 kfree(new_pts); 1178 kfree(new_pds); 1179 } 1180 1181 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both 1182 * of these are based on the number of PDPEs in the system. 1183 */ 1184 static 1185 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, 1186 unsigned long **new_pts, 1187 uint32_t pdpes) 1188 { 1189 unsigned long *pds; 1190 unsigned long *pts; 1191 1192 pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY); 1193 if (!pds) 1194 return -ENOMEM; 1195 1196 pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long), 1197 GFP_TEMPORARY); 1198 if (!pts) 1199 goto err_out; 1200 1201 *new_pds = pds; 1202 *new_pts = pts; 1203 1204 return 0; 1205 1206 err_out: 1207 free_gen8_temp_bitmaps(pds, pts); 1208 return -ENOMEM; 1209 } 1210 1211 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify 1212 * the page table structures, we mark them dirty so that 1213 * context switching/execlist queuing code takes extra steps 1214 * to ensure that tlbs are flushed. 1215 */ 1216 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) 1217 { 1218 ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask; 1219 } 1220 1221 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, 1222 struct i915_page_directory_pointer *pdp, 1223 uint64_t start, 1224 uint64_t length) 1225 { 1226 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1227 unsigned long *new_page_dirs, *new_page_tables; 1228 struct drm_device *dev = vm->dev; 1229 struct i915_page_directory *pd; 1230 const uint64_t orig_start = start; 1231 const uint64_t orig_length = length; 1232 uint32_t pdpe; 1233 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1234 int ret; 1235 1236 /* Wrap is never okay since we can only represent 48b, and we don't 1237 * actually use the other side of the canonical address space. 1238 */ 1239 if (WARN_ON(start + length < start)) 1240 return -ENODEV; 1241 1242 if (WARN_ON(start + length > vm->total)) 1243 return -ENODEV; 1244 1245 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1246 if (ret) 1247 return ret; 1248 1249 /* Do the allocations first so we can easily bail out */ 1250 ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, 1251 new_page_dirs); 1252 if (ret) { 1253 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1254 return ret; 1255 } 1256 1257 /* For every page directory referenced, allocate page tables */ 1258 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1259 ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length, 1260 new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES)); 1261 if (ret) 1262 goto err_out; 1263 } 1264 1265 start = orig_start; 1266 length = orig_length; 1267 1268 /* Allocations have completed successfully, so set the bitmaps, and do 1269 * the mappings. */ 1270 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1271 gen8_pde_t *const page_directory = kmap_px(pd); 1272 struct i915_page_table *pt; 1273 uint64_t pd_len = length; 1274 uint64_t pd_start = start; 1275 uint32_t pde; 1276 1277 /* Every pd should be allocated, we just did that above. */ 1278 WARN_ON(!pd); 1279 1280 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1281 /* Same reasoning as pd */ 1282 WARN_ON(!pt); 1283 WARN_ON(!pd_len); 1284 WARN_ON(!gen8_pte_count(pd_start, pd_len)); 1285 1286 /* Set our used ptes within the page table */ 1287 bitmap_set(pt->used_ptes, 1288 gen8_pte_index(pd_start), 1289 gen8_pte_count(pd_start, pd_len)); 1290 1291 /* Our pde is now pointing to the pagetable, pt */ 1292 __set_bit(pde, pd->used_pdes); 1293 1294 /* Map the PDE to the page table */ 1295 page_directory[pde] = gen8_pde_encode(px_dma(pt), 1296 I915_CACHE_LLC); 1297 trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, 1298 gen8_pte_index(start), 1299 gen8_pte_count(start, length), 1300 GEN8_PTES); 1301 1302 /* NB: We haven't yet mapped ptes to pages. At this 1303 * point we're still relying on insert_entries() */ 1304 } 1305 1306 kunmap_px(ppgtt, page_directory); 1307 __set_bit(pdpe, pdp->used_pdpes); 1308 gen8_setup_page_directory(ppgtt, pdp, pd, pdpe); 1309 } 1310 1311 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1312 mark_tlbs_dirty(ppgtt); 1313 return 0; 1314 1315 err_out: 1316 while (pdpe--) { 1317 unsigned long temp; 1318 1319 for_each_set_bit(temp, new_page_tables + pdpe * 1320 BITS_TO_LONGS(I915_PDES), I915_PDES) 1321 free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]); 1322 } 1323 1324 for_each_set_bit(pdpe, new_page_dirs, pdpes) 1325 free_pd(dev, pdp->page_directory[pdpe]); 1326 1327 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1328 mark_tlbs_dirty(ppgtt); 1329 return ret; 1330 } 1331 1332 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, 1333 struct i915_pml4 *pml4, 1334 uint64_t start, 1335 uint64_t length) 1336 { 1337 DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); 1338 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1339 struct i915_page_directory_pointer *pdp; 1340 uint64_t pml4e; 1341 int ret = 0; 1342 1343 /* Do the pml4 allocations first, so we don't need to track the newly 1344 * allocated tables below the pdp */ 1345 bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); 1346 1347 /* The pagedirectory and pagetable allocations are done in the shared 3 1348 * and 4 level code. Just allocate the pdps. 1349 */ 1350 ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, 1351 new_pdps); 1352 if (ret) 1353 return ret; 1354 1355 WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, 1356 "The allocation has spanned more than 512GB. " 1357 "It is highly likely this is incorrect."); 1358 1359 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1360 WARN_ON(!pdp); 1361 1362 ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); 1363 if (ret) 1364 goto err_out; 1365 1366 gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e); 1367 } 1368 1369 bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, 1370 GEN8_PML4ES_PER_PML4); 1371 1372 return 0; 1373 1374 err_out: 1375 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1376 gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]); 1377 1378 return ret; 1379 } 1380 1381 static int gen8_alloc_va_range(struct i915_address_space *vm, 1382 uint64_t start, uint64_t length) 1383 { 1384 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1385 1386 if (USES_FULL_48BIT_PPGTT(vm->dev)) 1387 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); 1388 else 1389 return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); 1390 } 1391 1392 static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, 1393 uint64_t start, uint64_t length, 1394 gen8_pte_t scratch_pte, 1395 struct seq_file *m) 1396 { 1397 struct i915_page_directory *pd; 1398 uint32_t pdpe; 1399 1400 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1401 struct i915_page_table *pt; 1402 uint64_t pd_len = length; 1403 uint64_t pd_start = start; 1404 uint32_t pde; 1405 1406 if (!test_bit(pdpe, pdp->used_pdpes)) 1407 continue; 1408 1409 seq_printf(m, "\tPDPE #%d\n", pdpe); 1410 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1411 uint32_t pte; 1412 gen8_pte_t *pt_vaddr; 1413 1414 if (!test_bit(pde, pd->used_pdes)) 1415 continue; 1416 1417 pt_vaddr = kmap_px(pt); 1418 for (pte = 0; pte < GEN8_PTES; pte += 4) { 1419 uint64_t va = 1420 (pdpe << GEN8_PDPE_SHIFT) | 1421 (pde << GEN8_PDE_SHIFT) | 1422 (pte << GEN8_PTE_SHIFT); 1423 int i; 1424 bool found = false; 1425 1426 for (i = 0; i < 4; i++) 1427 if (pt_vaddr[pte + i] != scratch_pte) 1428 found = true; 1429 if (!found) 1430 continue; 1431 1432 seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte); 1433 for (i = 0; i < 4; i++) { 1434 if (pt_vaddr[pte + i] != scratch_pte) 1435 seq_printf(m, " %llx", pt_vaddr[pte + i]); 1436 else 1437 seq_puts(m, " SCRATCH "); 1438 } 1439 seq_puts(m, "\n"); 1440 } 1441 /* don't use kunmap_px, it could trigger 1442 * an unnecessary flush. 1443 */ 1444 kunmap_atomic(pt_vaddr); 1445 } 1446 } 1447 } 1448 1449 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1450 { 1451 struct i915_address_space *vm = &ppgtt->base; 1452 uint64_t start = ppgtt->base.start; 1453 uint64_t length = ppgtt->base.total; 1454 gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 1455 I915_CACHE_LLC, true); 1456 1457 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 1458 gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); 1459 } else { 1460 uint64_t pml4e; 1461 struct i915_pml4 *pml4 = &ppgtt->pml4; 1462 struct i915_page_directory_pointer *pdp; 1463 1464 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1465 if (!test_bit(pml4e, pml4->used_pml4es)) 1466 continue; 1467 1468 seq_printf(m, " PML4E #%llu\n", pml4e); 1469 gen8_dump_pdp(pdp, start, length, scratch_pte, m); 1470 } 1471 } 1472 } 1473 1474 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) 1475 { 1476 unsigned long *new_page_dirs, *new_page_tables; 1477 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1478 int ret; 1479 1480 /* We allocate temp bitmap for page tables for no gain 1481 * but as this is for init only, lets keep the things simple 1482 */ 1483 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1484 if (ret) 1485 return ret; 1486 1487 /* Allocate for all pdps regardless of how the ppgtt 1488 * was defined. 1489 */ 1490 ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp, 1491 0, 1ULL << 32, 1492 new_page_dirs); 1493 if (!ret) 1494 *ppgtt->pdp.used_pdpes = *new_page_dirs; 1495 1496 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1497 1498 return ret; 1499 } 1500 1501 /* 1502 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 1503 * with a net effect resembling a 2-level page table in normal x86 terms. Each 1504 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 1505 * space. 1506 * 1507 */ 1508 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1509 { 1510 int ret; 1511 1512 ret = gen8_init_scratch(&ppgtt->base); 1513 if (ret) 1514 return ret; 1515 1516 ppgtt->base.start = 0; 1517 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 1518 ppgtt->base.allocate_va_range = gen8_alloc_va_range; 1519 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 1520 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 1521 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 1522 ppgtt->base.bind_vma = ppgtt_bind_vma; 1523 ppgtt->debug_dump = gen8_dump_ppgtt; 1524 1525 if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { 1526 ret = setup_px(ppgtt->base.dev, &ppgtt->pml4); 1527 if (ret) 1528 goto free_scratch; 1529 1530 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); 1531 1532 ppgtt->base.total = 1ULL << 48; 1533 ppgtt->switch_mm = gen8_48b_mm_switch; 1534 } else { 1535 ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp); 1536 if (ret) 1537 goto free_scratch; 1538 1539 ppgtt->base.total = 1ULL << 32; 1540 ppgtt->switch_mm = gen8_legacy_mm_switch; 1541 trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 1542 0, 0, 1543 GEN8_PML4E_SHIFT); 1544 1545 if (intel_vgpu_active(to_i915(ppgtt->base.dev))) { 1546 ret = gen8_preallocate_top_level_pdps(ppgtt); 1547 if (ret) 1548 goto free_scratch; 1549 } 1550 } 1551 1552 if (intel_vgpu_active(to_i915(ppgtt->base.dev))) 1553 gen8_ppgtt_notify_vgt(ppgtt, true); 1554 1555 return 0; 1556 1557 free_scratch: 1558 gen8_free_scratch(&ppgtt->base); 1559 return ret; 1560 } 1561 1562 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1563 { 1564 struct i915_address_space *vm = &ppgtt->base; 1565 struct i915_page_table *unused; 1566 gen6_pte_t scratch_pte; 1567 uint32_t pd_entry; 1568 uint32_t pte, pde; 1569 uint32_t start = ppgtt->base.start, length = ppgtt->base.total; 1570 1571 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 1572 I915_CACHE_LLC, true, 0); 1573 1574 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) { 1575 u32 expected; 1576 gen6_pte_t *pt_vaddr; 1577 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]); 1578 pd_entry = readl(ppgtt->pd_addr + pde); 1579 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 1580 1581 if (pd_entry != expected) 1582 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 1583 pde, 1584 pd_entry, 1585 expected); 1586 seq_printf(m, "\tPDE: %x\n", pd_entry); 1587 1588 pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]); 1589 1590 for (pte = 0; pte < GEN6_PTES; pte+=4) { 1591 unsigned long va = 1592 (pde * PAGE_SIZE * GEN6_PTES) + 1593 (pte * PAGE_SIZE); 1594 int i; 1595 bool found = false; 1596 for (i = 0; i < 4; i++) 1597 if (pt_vaddr[pte + i] != scratch_pte) 1598 found = true; 1599 if (!found) 1600 continue; 1601 1602 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 1603 for (i = 0; i < 4; i++) { 1604 if (pt_vaddr[pte + i] != scratch_pte) 1605 seq_printf(m, " %08x", pt_vaddr[pte + i]); 1606 else 1607 seq_puts(m, " SCRATCH "); 1608 } 1609 seq_puts(m, "\n"); 1610 } 1611 kunmap_px(ppgtt, pt_vaddr); 1612 } 1613 } 1614 1615 /* Write pde (index) from the page directory @pd to the page table @pt */ 1616 static void gen6_write_pde(struct i915_page_directory *pd, 1617 const int pde, struct i915_page_table *pt) 1618 { 1619 /* Caller needs to make sure the write completes if necessary */ 1620 struct i915_hw_ppgtt *ppgtt = 1621 container_of(pd, struct i915_hw_ppgtt, pd); 1622 u32 pd_entry; 1623 1624 pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt)); 1625 pd_entry |= GEN6_PDE_VALID; 1626 1627 writel(pd_entry, ppgtt->pd_addr + pde); 1628 } 1629 1630 /* Write all the page tables found in the ppgtt structure to incrementing page 1631 * directories. */ 1632 static void gen6_write_page_range(struct drm_i915_private *dev_priv, 1633 struct i915_page_directory *pd, 1634 uint32_t start, uint32_t length) 1635 { 1636 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1637 struct i915_page_table *pt; 1638 uint32_t pde; 1639 1640 gen6_for_each_pde(pt, pd, start, length, pde) 1641 gen6_write_pde(pd, pde, pt); 1642 1643 /* Make sure write is complete before other code can use this page 1644 * table. Also require for WC mapped PTEs */ 1645 readl(ggtt->gsm); 1646 } 1647 1648 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) 1649 { 1650 BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); 1651 1652 return (ppgtt->pd.base.ggtt_offset / 64) << 16; 1653 } 1654 1655 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 1656 struct drm_i915_gem_request *req) 1657 { 1658 struct intel_ring *ring = req->ring; 1659 struct intel_engine_cs *engine = req->engine; 1660 int ret; 1661 1662 /* NB: TLBs must be flushed and invalidated before a switch */ 1663 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1664 if (ret) 1665 return ret; 1666 1667 ret = intel_ring_begin(req, 6); 1668 if (ret) 1669 return ret; 1670 1671 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 1672 intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); 1673 intel_ring_emit(ring, PP_DIR_DCLV_2G); 1674 intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); 1675 intel_ring_emit(ring, get_pd_offset(ppgtt)); 1676 intel_ring_emit(ring, MI_NOOP); 1677 intel_ring_advance(ring); 1678 1679 return 0; 1680 } 1681 1682 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 1683 struct drm_i915_gem_request *req) 1684 { 1685 struct intel_ring *ring = req->ring; 1686 struct intel_engine_cs *engine = req->engine; 1687 int ret; 1688 1689 /* NB: TLBs must be flushed and invalidated before a switch */ 1690 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1691 if (ret) 1692 return ret; 1693 1694 ret = intel_ring_begin(req, 6); 1695 if (ret) 1696 return ret; 1697 1698 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 1699 intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); 1700 intel_ring_emit(ring, PP_DIR_DCLV_2G); 1701 intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); 1702 intel_ring_emit(ring, get_pd_offset(ppgtt)); 1703 intel_ring_emit(ring, MI_NOOP); 1704 intel_ring_advance(ring); 1705 1706 /* XXX: RCS is the only one to auto invalidate the TLBs? */ 1707 if (engine->id != RCS) { 1708 ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); 1709 if (ret) 1710 return ret; 1711 } 1712 1713 return 0; 1714 } 1715 1716 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 1717 struct drm_i915_gem_request *req) 1718 { 1719 struct intel_engine_cs *engine = req->engine; 1720 struct drm_i915_private *dev_priv = req->i915; 1721 1722 I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); 1723 I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); 1724 return 0; 1725 } 1726 1727 static void gen8_ppgtt_enable(struct drm_device *dev) 1728 { 1729 struct drm_i915_private *dev_priv = to_i915(dev); 1730 struct intel_engine_cs *engine; 1731 1732 for_each_engine(engine, dev_priv) { 1733 u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0; 1734 I915_WRITE(RING_MODE_GEN7(engine), 1735 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level)); 1736 } 1737 } 1738 1739 static void gen7_ppgtt_enable(struct drm_device *dev) 1740 { 1741 struct drm_i915_private *dev_priv = to_i915(dev); 1742 struct intel_engine_cs *engine; 1743 uint32_t ecochk, ecobits; 1744 1745 ecobits = I915_READ(GAC_ECO_BITS); 1746 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 1747 1748 ecochk = I915_READ(GAM_ECOCHK); 1749 if (IS_HASWELL(dev)) { 1750 ecochk |= ECOCHK_PPGTT_WB_HSW; 1751 } else { 1752 ecochk |= ECOCHK_PPGTT_LLC_IVB; 1753 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 1754 } 1755 I915_WRITE(GAM_ECOCHK, ecochk); 1756 1757 for_each_engine(engine, dev_priv) { 1758 /* GFX_MODE is per-ring on gen7+ */ 1759 I915_WRITE(RING_MODE_GEN7(engine), 1760 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1761 } 1762 } 1763 1764 static void gen6_ppgtt_enable(struct drm_device *dev) 1765 { 1766 struct drm_i915_private *dev_priv = to_i915(dev); 1767 uint32_t ecochk, gab_ctl, ecobits; 1768 1769 ecobits = I915_READ(GAC_ECO_BITS); 1770 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 1771 ECOBITS_PPGTT_CACHE64B); 1772 1773 gab_ctl = I915_READ(GAB_CTL); 1774 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 1775 1776 ecochk = I915_READ(GAM_ECOCHK); 1777 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 1778 1779 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1780 } 1781 1782 /* PPGTT support for Sandybdrige/Gen6 and later */ 1783 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 1784 uint64_t start, 1785 uint64_t length, 1786 bool use_scratch) 1787 { 1788 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1789 gen6_pte_t *pt_vaddr, scratch_pte; 1790 unsigned first_entry = start >> PAGE_SHIFT; 1791 unsigned num_entries = length >> PAGE_SHIFT; 1792 unsigned act_pt = first_entry / GEN6_PTES; 1793 unsigned first_pte = first_entry % GEN6_PTES; 1794 unsigned last_pte, i; 1795 1796 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 1797 I915_CACHE_LLC, true, 0); 1798 1799 while (num_entries) { 1800 last_pte = first_pte + num_entries; 1801 if (last_pte > GEN6_PTES) 1802 last_pte = GEN6_PTES; 1803 1804 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1805 1806 for (i = first_pte; i < last_pte; i++) 1807 pt_vaddr[i] = scratch_pte; 1808 1809 kunmap_px(ppgtt, pt_vaddr); 1810 1811 num_entries -= last_pte - first_pte; 1812 first_pte = 0; 1813 act_pt++; 1814 } 1815 } 1816 1817 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 1818 struct sg_table *pages, 1819 uint64_t start, 1820 enum i915_cache_level cache_level, u32 flags) 1821 { 1822 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1823 unsigned first_entry = start >> PAGE_SHIFT; 1824 unsigned act_pt = first_entry / GEN6_PTES; 1825 unsigned act_pte = first_entry % GEN6_PTES; 1826 gen6_pte_t *pt_vaddr = NULL; 1827 struct sgt_iter sgt_iter; 1828 dma_addr_t addr; 1829 1830 for_each_sgt_dma(addr, sgt_iter, pages) { 1831 if (pt_vaddr == NULL) 1832 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1833 1834 pt_vaddr[act_pte] = 1835 vm->pte_encode(addr, cache_level, true, flags); 1836 1837 if (++act_pte == GEN6_PTES) { 1838 kunmap_px(ppgtt, pt_vaddr); 1839 pt_vaddr = NULL; 1840 act_pt++; 1841 act_pte = 0; 1842 } 1843 } 1844 1845 if (pt_vaddr) 1846 kunmap_px(ppgtt, pt_vaddr); 1847 } 1848 1849 static int gen6_alloc_va_range(struct i915_address_space *vm, 1850 uint64_t start_in, uint64_t length_in) 1851 { 1852 DECLARE_BITMAP(new_page_tables, I915_PDES); 1853 struct drm_device *dev = vm->dev; 1854 struct drm_i915_private *dev_priv = to_i915(dev); 1855 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1856 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1857 struct i915_page_table *pt; 1858 uint32_t start, length, start_save, length_save; 1859 uint32_t pde; 1860 int ret; 1861 1862 if (WARN_ON(start_in + length_in > ppgtt->base.total)) 1863 return -ENODEV; 1864 1865 start = start_save = start_in; 1866 length = length_save = length_in; 1867 1868 bitmap_zero(new_page_tables, I915_PDES); 1869 1870 /* The allocation is done in two stages so that we can bail out with 1871 * minimal amount of pain. The first stage finds new page tables that 1872 * need allocation. The second stage marks use ptes within the page 1873 * tables. 1874 */ 1875 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { 1876 if (pt != vm->scratch_pt) { 1877 WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES)); 1878 continue; 1879 } 1880 1881 /* We've already allocated a page table */ 1882 WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); 1883 1884 pt = alloc_pt(dev); 1885 if (IS_ERR(pt)) { 1886 ret = PTR_ERR(pt); 1887 goto unwind_out; 1888 } 1889 1890 gen6_initialize_pt(vm, pt); 1891 1892 ppgtt->pd.page_table[pde] = pt; 1893 __set_bit(pde, new_page_tables); 1894 trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT); 1895 } 1896 1897 start = start_save; 1898 length = length_save; 1899 1900 gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { 1901 DECLARE_BITMAP(tmp_bitmap, GEN6_PTES); 1902 1903 bitmap_zero(tmp_bitmap, GEN6_PTES); 1904 bitmap_set(tmp_bitmap, gen6_pte_index(start), 1905 gen6_pte_count(start, length)); 1906 1907 if (__test_and_clear_bit(pde, new_page_tables)) 1908 gen6_write_pde(&ppgtt->pd, pde, pt); 1909 1910 trace_i915_page_table_entry_map(vm, pde, pt, 1911 gen6_pte_index(start), 1912 gen6_pte_count(start, length), 1913 GEN6_PTES); 1914 bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes, 1915 GEN6_PTES); 1916 } 1917 1918 WARN_ON(!bitmap_empty(new_page_tables, I915_PDES)); 1919 1920 /* Make sure write is complete before other code can use this page 1921 * table. Also require for WC mapped PTEs */ 1922 readl(ggtt->gsm); 1923 1924 mark_tlbs_dirty(ppgtt); 1925 return 0; 1926 1927 unwind_out: 1928 for_each_set_bit(pde, new_page_tables, I915_PDES) { 1929 struct i915_page_table *pt = ppgtt->pd.page_table[pde]; 1930 1931 ppgtt->pd.page_table[pde] = vm->scratch_pt; 1932 free_pt(vm->dev, pt); 1933 } 1934 1935 mark_tlbs_dirty(ppgtt); 1936 return ret; 1937 } 1938 1939 static int gen6_init_scratch(struct i915_address_space *vm) 1940 { 1941 struct drm_device *dev = vm->dev; 1942 int ret; 1943 1944 ret = setup_scratch_page(dev, &vm->scratch_page, I915_GFP_DMA); 1945 if (ret) 1946 return ret; 1947 1948 vm->scratch_pt = alloc_pt(dev); 1949 if (IS_ERR(vm->scratch_pt)) { 1950 cleanup_scratch_page(dev, &vm->scratch_page); 1951 return PTR_ERR(vm->scratch_pt); 1952 } 1953 1954 gen6_initialize_pt(vm, vm->scratch_pt); 1955 1956 return 0; 1957 } 1958 1959 static void gen6_free_scratch(struct i915_address_space *vm) 1960 { 1961 struct drm_device *dev = vm->dev; 1962 1963 free_pt(dev, vm->scratch_pt); 1964 cleanup_scratch_page(dev, &vm->scratch_page); 1965 } 1966 1967 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 1968 { 1969 struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 1970 struct i915_page_directory *pd = &ppgtt->pd; 1971 struct drm_device *dev = vm->dev; 1972 struct i915_page_table *pt; 1973 uint32_t pde; 1974 1975 drm_mm_remove_node(&ppgtt->node); 1976 1977 gen6_for_all_pdes(pt, pd, pde) 1978 if (pt != vm->scratch_pt) 1979 free_pt(dev, pt); 1980 1981 gen6_free_scratch(vm); 1982 } 1983 1984 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 1985 { 1986 struct i915_address_space *vm = &ppgtt->base; 1987 struct drm_device *dev = ppgtt->base.dev; 1988 struct drm_i915_private *dev_priv = to_i915(dev); 1989 struct i915_ggtt *ggtt = &dev_priv->ggtt; 1990 bool retried = false; 1991 int ret; 1992 1993 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 1994 * allocator works in address space sizes, so it's multiplied by page 1995 * size. We allocate at the top of the GTT to avoid fragmentation. 1996 */ 1997 BUG_ON(!drm_mm_initialized(&ggtt->base.mm)); 1998 1999 ret = gen6_init_scratch(vm); 2000 if (ret) 2001 return ret; 2002 2003 alloc: 2004 ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, 2005 &ppgtt->node, GEN6_PD_SIZE, 2006 GEN6_PD_ALIGN, 0, 2007 0, ggtt->base.total, 2008 DRM_MM_TOPDOWN); 2009 if (ret == -ENOSPC && !retried) { 2010 ret = i915_gem_evict_something(&ggtt->base, 2011 GEN6_PD_SIZE, GEN6_PD_ALIGN, 2012 I915_CACHE_NONE, 2013 0, ggtt->base.total, 2014 0); 2015 if (ret) 2016 goto err_out; 2017 2018 retried = true; 2019 goto alloc; 2020 } 2021 2022 if (ret) 2023 goto err_out; 2024 2025 2026 if (ppgtt->node.start < ggtt->mappable_end) 2027 DRM_DEBUG("Forced to use aperture for PDEs\n"); 2028 2029 return 0; 2030 2031 err_out: 2032 gen6_free_scratch(vm); 2033 return ret; 2034 } 2035 2036 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 2037 { 2038 return gen6_ppgtt_allocate_page_directories(ppgtt); 2039 } 2040 2041 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, 2042 uint64_t start, uint64_t length) 2043 { 2044 struct i915_page_table *unused; 2045 uint32_t pde; 2046 2047 gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) 2048 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; 2049 } 2050 2051 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 2052 { 2053 struct drm_device *dev = ppgtt->base.dev; 2054 struct drm_i915_private *dev_priv = to_i915(dev); 2055 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2056 int ret; 2057 2058 ppgtt->base.pte_encode = ggtt->base.pte_encode; 2059 if (intel_vgpu_active(dev_priv) || IS_GEN6(dev)) 2060 ppgtt->switch_mm = gen6_mm_switch; 2061 else if (IS_HASWELL(dev)) 2062 ppgtt->switch_mm = hsw_mm_switch; 2063 else if (IS_GEN7(dev)) 2064 ppgtt->switch_mm = gen7_mm_switch; 2065 else 2066 BUG(); 2067 2068 ret = gen6_ppgtt_alloc(ppgtt); 2069 if (ret) 2070 return ret; 2071 2072 ppgtt->base.allocate_va_range = gen6_alloc_va_range; 2073 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 2074 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 2075 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 2076 ppgtt->base.bind_vma = ppgtt_bind_vma; 2077 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 2078 ppgtt->base.start = 0; 2079 ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; 2080 ppgtt->debug_dump = gen6_dump_ppgtt; 2081 2082 ppgtt->pd.base.ggtt_offset = 2083 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); 2084 2085 ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + 2086 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); 2087 2088 gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); 2089 2090 gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); 2091 2092 DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", 2093 ppgtt->node.size >> 20, 2094 ppgtt->node.start / PAGE_SIZE); 2095 2096 DRM_DEBUG("Adding PPGTT at offset %x\n", 2097 ppgtt->pd.base.ggtt_offset << 10); 2098 2099 return 0; 2100 } 2101 2102 static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, 2103 struct drm_i915_private *dev_priv) 2104 { 2105 ppgtt->base.dev = &dev_priv->drm; 2106 2107 if (INTEL_INFO(dev_priv)->gen < 8) 2108 return gen6_ppgtt_init(ppgtt); 2109 else 2110 return gen8_ppgtt_init(ppgtt); 2111 } 2112 2113 static void i915_address_space_init(struct i915_address_space *vm, 2114 struct drm_i915_private *dev_priv) 2115 { 2116 drm_mm_init(&vm->mm, vm->start, vm->total); 2117 INIT_LIST_HEAD(&vm->active_list); 2118 INIT_LIST_HEAD(&vm->inactive_list); 2119 INIT_LIST_HEAD(&vm->unbound_list); 2120 list_add_tail(&vm->global_link, &dev_priv->vm_list); 2121 } 2122 2123 static void gtt_write_workarounds(struct drm_device *dev) 2124 { 2125 struct drm_i915_private *dev_priv = to_i915(dev); 2126 2127 /* This function is for gtt related workarounds. This function is 2128 * called on driver load and after a GPU reset, so you can place 2129 * workarounds here even if they get overwritten by GPU reset. 2130 */ 2131 /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */ 2132 if (IS_BROADWELL(dev)) 2133 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 2134 else if (IS_CHERRYVIEW(dev)) 2135 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 2136 else if (IS_SKYLAKE(dev)) 2137 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 2138 else if (IS_BROXTON(dev)) 2139 I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 2140 } 2141 2142 static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt, 2143 struct drm_i915_private *dev_priv, 2144 struct drm_i915_file_private *file_priv) 2145 { 2146 int ret; 2147 2148 ret = __hw_ppgtt_init(ppgtt, dev_priv); 2149 if (ret == 0) { 2150 kref_init(&ppgtt->ref); 2151 i915_address_space_init(&ppgtt->base, dev_priv); 2152 ppgtt->base.file = file_priv; 2153 } 2154 2155 return ret; 2156 } 2157 2158 int i915_ppgtt_init_hw(struct drm_device *dev) 2159 { 2160 gtt_write_workarounds(dev); 2161 2162 /* In the case of execlists, PPGTT is enabled by the context descriptor 2163 * and the PDPs are contained within the context itself. We don't 2164 * need to do anything here. */ 2165 if (i915.enable_execlists) 2166 return 0; 2167 2168 if (!USES_PPGTT(dev)) 2169 return 0; 2170 2171 if (IS_GEN6(dev)) 2172 gen6_ppgtt_enable(dev); 2173 else if (IS_GEN7(dev)) 2174 gen7_ppgtt_enable(dev); 2175 else if (INTEL_INFO(dev)->gen >= 8) 2176 gen8_ppgtt_enable(dev); 2177 else 2178 MISSING_CASE(INTEL_INFO(dev)->gen); 2179 2180 return 0; 2181 } 2182 2183 struct i915_hw_ppgtt * 2184 i915_ppgtt_create(struct drm_i915_private *dev_priv, 2185 struct drm_i915_file_private *fpriv) 2186 { 2187 struct i915_hw_ppgtt *ppgtt; 2188 int ret; 2189 2190 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2191 if (!ppgtt) 2192 return ERR_PTR(-ENOMEM); 2193 2194 ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv); 2195 if (ret) { 2196 kfree(ppgtt); 2197 return ERR_PTR(ret); 2198 } 2199 2200 trace_i915_ppgtt_create(&ppgtt->base); 2201 2202 return ppgtt; 2203 } 2204 2205 void i915_ppgtt_release(struct kref *kref) 2206 { 2207 struct i915_hw_ppgtt *ppgtt = 2208 container_of(kref, struct i915_hw_ppgtt, ref); 2209 2210 trace_i915_ppgtt_release(&ppgtt->base); 2211 2212 /* vmas should already be unbound and destroyed */ 2213 WARN_ON(!list_empty(&ppgtt->base.active_list)); 2214 WARN_ON(!list_empty(&ppgtt->base.inactive_list)); 2215 WARN_ON(!list_empty(&ppgtt->base.unbound_list)); 2216 2217 list_del(&ppgtt->base.global_link); 2218 drm_mm_takedown(&ppgtt->base.mm); 2219 2220 ppgtt->base.cleanup(&ppgtt->base); 2221 kfree(ppgtt); 2222 } 2223 2224 /* Certain Gen5 chipsets require require idling the GPU before 2225 * unmapping anything from the GTT when VT-d is enabled. 2226 */ 2227 static bool needs_idle_maps(struct drm_i915_private *dev_priv) 2228 { 2229 #ifdef CONFIG_INTEL_IOMMU 2230 /* Query intel_iommu to see if we need the workaround. Presumably that 2231 * was loaded first. 2232 */ 2233 if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped) 2234 return true; 2235 #endif 2236 return false; 2237 } 2238 2239 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv) 2240 { 2241 struct intel_engine_cs *engine; 2242 2243 if (INTEL_INFO(dev_priv)->gen < 6) 2244 return; 2245 2246 for_each_engine(engine, dev_priv) { 2247 u32 fault_reg; 2248 fault_reg = I915_READ(RING_FAULT_REG(engine)); 2249 if (fault_reg & RING_FAULT_VALID) { 2250 DRM_DEBUG_DRIVER("Unexpected fault\n" 2251 "\tAddr: 0x%08lx\n" 2252 "\tAddress space: %s\n" 2253 "\tSource ID: %d\n" 2254 "\tType: %d\n", 2255 fault_reg & PAGE_MASK, 2256 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 2257 RING_FAULT_SRCID(fault_reg), 2258 RING_FAULT_FAULT_TYPE(fault_reg)); 2259 I915_WRITE(RING_FAULT_REG(engine), 2260 fault_reg & ~RING_FAULT_VALID); 2261 } 2262 } 2263 POSTING_READ(RING_FAULT_REG(&dev_priv->engine[RCS])); 2264 } 2265 2266 static void i915_ggtt_flush(struct drm_i915_private *dev_priv) 2267 { 2268 if (INTEL_INFO(dev_priv)->gen < 6) { 2269 intel_gtt_chipset_flush(); 2270 } else { 2271 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2272 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2273 } 2274 } 2275 2276 void i915_gem_suspend_gtt_mappings(struct drm_device *dev) 2277 { 2278 struct drm_i915_private *dev_priv = to_i915(dev); 2279 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2280 2281 /* Don't bother messing with faults pre GEN6 as we have little 2282 * documentation supporting that it's a good idea. 2283 */ 2284 if (INTEL_INFO(dev)->gen < 6) 2285 return; 2286 2287 i915_check_and_clear_faults(dev_priv); 2288 2289 ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total, 2290 true); 2291 2292 i915_ggtt_flush(dev_priv); 2293 } 2294 2295 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) 2296 { 2297 if (!dma_map_sg(&obj->base.dev->pdev->dev, 2298 obj->pages->sgl, obj->pages->nents, 2299 PCI_DMA_BIDIRECTIONAL)) 2300 return -ENOSPC; 2301 2302 return 0; 2303 } 2304 2305 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 2306 { 2307 writeq(pte, addr); 2308 } 2309 2310 static void gen8_ggtt_insert_page(struct i915_address_space *vm, 2311 dma_addr_t addr, 2312 uint64_t offset, 2313 enum i915_cache_level level, 2314 u32 unused) 2315 { 2316 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2317 gen8_pte_t __iomem *pte = 2318 (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + 2319 (offset >> PAGE_SHIFT); 2320 int rpm_atomic_seq; 2321 2322 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2323 2324 gen8_set_pte(pte, gen8_pte_encode(addr, level, true)); 2325 2326 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2327 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2328 2329 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2330 } 2331 2332 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 2333 struct sg_table *st, 2334 uint64_t start, 2335 enum i915_cache_level level, u32 unused) 2336 { 2337 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2338 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2339 struct sgt_iter sgt_iter; 2340 gen8_pte_t __iomem *gtt_entries; 2341 gen8_pte_t gtt_entry; 2342 dma_addr_t addr; 2343 int rpm_atomic_seq; 2344 int i = 0; 2345 2346 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2347 2348 gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); 2349 2350 for_each_sgt_dma(addr, sgt_iter, st) { 2351 gtt_entry = gen8_pte_encode(addr, level, true); 2352 gen8_set_pte(>t_entries[i++], gtt_entry); 2353 } 2354 2355 /* 2356 * XXX: This serves as a posting read to make sure that the PTE has 2357 * actually been updated. There is some concern that even though 2358 * registers and PTEs are within the same BAR that they are potentially 2359 * of NUMA access patterns. Therefore, even with the way we assume 2360 * hardware should work, we must keep this posting read for paranoia. 2361 */ 2362 if (i != 0) 2363 WARN_ON(readq(>t_entries[i-1]) != gtt_entry); 2364 2365 /* This next bit makes the above posting read even more important. We 2366 * want to flush the TLBs only after we're certain all the PTE updates 2367 * have finished. 2368 */ 2369 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2370 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2371 2372 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2373 } 2374 2375 struct insert_entries { 2376 struct i915_address_space *vm; 2377 struct sg_table *st; 2378 uint64_t start; 2379 enum i915_cache_level level; 2380 u32 flags; 2381 }; 2382 2383 static int gen8_ggtt_insert_entries__cb(void *_arg) 2384 { 2385 struct insert_entries *arg = _arg; 2386 gen8_ggtt_insert_entries(arg->vm, arg->st, 2387 arg->start, arg->level, arg->flags); 2388 return 0; 2389 } 2390 2391 static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, 2392 struct sg_table *st, 2393 uint64_t start, 2394 enum i915_cache_level level, 2395 u32 flags) 2396 { 2397 struct insert_entries arg = { vm, st, start, level, flags }; 2398 stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); 2399 } 2400 2401 static void gen6_ggtt_insert_page(struct i915_address_space *vm, 2402 dma_addr_t addr, 2403 uint64_t offset, 2404 enum i915_cache_level level, 2405 u32 flags) 2406 { 2407 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2408 gen6_pte_t __iomem *pte = 2409 (gen6_pte_t __iomem *)dev_priv->ggtt.gsm + 2410 (offset >> PAGE_SHIFT); 2411 int rpm_atomic_seq; 2412 2413 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2414 2415 iowrite32(vm->pte_encode(addr, level, true, flags), pte); 2416 2417 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2418 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2419 2420 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2421 } 2422 2423 /* 2424 * Binds an object into the global gtt with the specified cache level. The object 2425 * will be accessible to the GPU via commands whose operands reference offsets 2426 * within the global GTT as well as accessible by the GPU through the GMADR 2427 * mapped BAR (dev_priv->mm.gtt->gtt). 2428 */ 2429 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 2430 struct sg_table *st, 2431 uint64_t start, 2432 enum i915_cache_level level, u32 flags) 2433 { 2434 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2435 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2436 struct sgt_iter sgt_iter; 2437 gen6_pte_t __iomem *gtt_entries; 2438 gen6_pte_t gtt_entry; 2439 dma_addr_t addr; 2440 int rpm_atomic_seq; 2441 int i = 0; 2442 2443 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2444 2445 gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); 2446 2447 for_each_sgt_dma(addr, sgt_iter, st) { 2448 gtt_entry = vm->pte_encode(addr, level, true, flags); 2449 iowrite32(gtt_entry, >t_entries[i++]); 2450 } 2451 2452 /* XXX: This serves as a posting read to make sure that the PTE has 2453 * actually been updated. There is some concern that even though 2454 * registers and PTEs are within the same BAR that they are potentially 2455 * of NUMA access patterns. Therefore, even with the way we assume 2456 * hardware should work, we must keep this posting read for paranoia. 2457 */ 2458 if (i != 0) 2459 WARN_ON(readl(>t_entries[i-1]) != gtt_entry); 2460 2461 /* This next bit makes the above posting read even more important. We 2462 * want to flush the TLBs only after we're certain all the PTE updates 2463 * have finished. 2464 */ 2465 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2466 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2467 2468 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2469 } 2470 2471 static void nop_clear_range(struct i915_address_space *vm, 2472 uint64_t start, 2473 uint64_t length, 2474 bool use_scratch) 2475 { 2476 } 2477 2478 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 2479 uint64_t start, 2480 uint64_t length, 2481 bool use_scratch) 2482 { 2483 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2484 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2485 unsigned first_entry = start >> PAGE_SHIFT; 2486 unsigned num_entries = length >> PAGE_SHIFT; 2487 gen8_pte_t scratch_pte, __iomem *gtt_base = 2488 (gen8_pte_t __iomem *)ggtt->gsm + first_entry; 2489 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2490 int i; 2491 int rpm_atomic_seq; 2492 2493 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2494 2495 if (WARN(num_entries > max_entries, 2496 "First entry = %d; Num entries = %d (max=%d)\n", 2497 first_entry, num_entries, max_entries)) 2498 num_entries = max_entries; 2499 2500 scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, 2501 I915_CACHE_LLC, 2502 use_scratch); 2503 for (i = 0; i < num_entries; i++) 2504 gen8_set_pte(>t_base[i], scratch_pte); 2505 readl(gtt_base); 2506 2507 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2508 } 2509 2510 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 2511 uint64_t start, 2512 uint64_t length, 2513 bool use_scratch) 2514 { 2515 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2516 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2517 unsigned first_entry = start >> PAGE_SHIFT; 2518 unsigned num_entries = length >> PAGE_SHIFT; 2519 gen6_pte_t scratch_pte, __iomem *gtt_base = 2520 (gen6_pte_t __iomem *)ggtt->gsm + first_entry; 2521 const int max_entries = ggtt_total_entries(ggtt) - first_entry; 2522 int i; 2523 int rpm_atomic_seq; 2524 2525 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2526 2527 if (WARN(num_entries > max_entries, 2528 "First entry = %d; Num entries = %d (max=%d)\n", 2529 first_entry, num_entries, max_entries)) 2530 num_entries = max_entries; 2531 2532 scratch_pte = vm->pte_encode(vm->scratch_page.daddr, 2533 I915_CACHE_LLC, use_scratch, 0); 2534 2535 for (i = 0; i < num_entries; i++) 2536 iowrite32(scratch_pte, >t_base[i]); 2537 readl(gtt_base); 2538 2539 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2540 } 2541 2542 static void i915_ggtt_insert_page(struct i915_address_space *vm, 2543 dma_addr_t addr, 2544 uint64_t offset, 2545 enum i915_cache_level cache_level, 2546 u32 unused) 2547 { 2548 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2549 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2550 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2551 int rpm_atomic_seq; 2552 2553 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2554 2555 intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); 2556 2557 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2558 } 2559 2560 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 2561 struct sg_table *pages, 2562 uint64_t start, 2563 enum i915_cache_level cache_level, u32 unused) 2564 { 2565 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2566 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2567 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2568 int rpm_atomic_seq; 2569 2570 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2571 2572 intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags); 2573 2574 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2575 2576 } 2577 2578 static void i915_ggtt_clear_range(struct i915_address_space *vm, 2579 uint64_t start, 2580 uint64_t length, 2581 bool unused) 2582 { 2583 struct drm_i915_private *dev_priv = to_i915(vm->dev); 2584 unsigned first_entry = start >> PAGE_SHIFT; 2585 unsigned num_entries = length >> PAGE_SHIFT; 2586 int rpm_atomic_seq; 2587 2588 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2589 2590 intel_gtt_clear_range(first_entry, num_entries); 2591 2592 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2593 } 2594 2595 static int ggtt_bind_vma(struct i915_vma *vma, 2596 enum i915_cache_level cache_level, 2597 u32 flags) 2598 { 2599 struct drm_i915_gem_object *obj = vma->obj; 2600 u32 pte_flags = 0; 2601 int ret; 2602 2603 ret = i915_get_ggtt_vma_pages(vma); 2604 if (ret) 2605 return ret; 2606 2607 /* Currently applicable only to VLV */ 2608 if (obj->gt_ro) 2609 pte_flags |= PTE_READ_ONLY; 2610 2611 vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, 2612 cache_level, pte_flags); 2613 2614 /* 2615 * Without aliasing PPGTT there's no difference between 2616 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally 2617 * upgrade to both bound if we bind either to avoid double-binding. 2618 */ 2619 vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; 2620 2621 return 0; 2622 } 2623 2624 static int aliasing_gtt_bind_vma(struct i915_vma *vma, 2625 enum i915_cache_level cache_level, 2626 u32 flags) 2627 { 2628 u32 pte_flags; 2629 int ret; 2630 2631 ret = i915_get_ggtt_vma_pages(vma); 2632 if (ret) 2633 return ret; 2634 2635 /* Currently applicable only to VLV */ 2636 pte_flags = 0; 2637 if (vma->obj->gt_ro) 2638 pte_flags |= PTE_READ_ONLY; 2639 2640 2641 if (flags & I915_VMA_GLOBAL_BIND) { 2642 vma->vm->insert_entries(vma->vm, 2643 vma->pages, vma->node.start, 2644 cache_level, pte_flags); 2645 } 2646 2647 if (flags & I915_VMA_LOCAL_BIND) { 2648 struct i915_hw_ppgtt *appgtt = 2649 to_i915(vma->vm->dev)->mm.aliasing_ppgtt; 2650 appgtt->base.insert_entries(&appgtt->base, 2651 vma->pages, vma->node.start, 2652 cache_level, pte_flags); 2653 } 2654 2655 return 0; 2656 } 2657 2658 static void ggtt_unbind_vma(struct i915_vma *vma) 2659 { 2660 struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt; 2661 const u64 size = min(vma->size, vma->node.size); 2662 2663 if (vma->flags & I915_VMA_GLOBAL_BIND) 2664 vma->vm->clear_range(vma->vm, 2665 vma->node.start, size, 2666 true); 2667 2668 if (vma->flags & I915_VMA_LOCAL_BIND && appgtt) 2669 appgtt->base.clear_range(&appgtt->base, 2670 vma->node.start, size, 2671 true); 2672 } 2673 2674 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 2675 { 2676 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 2677 struct device *kdev = &dev_priv->drm.pdev->dev; 2678 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2679 2680 if (unlikely(ggtt->do_idle_maps)) { 2681 if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) { 2682 DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); 2683 /* Wait a bit, in hopes it avoids the hang */ 2684 udelay(10); 2685 } 2686 } 2687 2688 dma_unmap_sg(kdev, obj->pages->sgl, obj->pages->nents, 2689 PCI_DMA_BIDIRECTIONAL); 2690 } 2691 2692 static void i915_gtt_color_adjust(struct drm_mm_node *node, 2693 unsigned long color, 2694 u64 *start, 2695 u64 *end) 2696 { 2697 if (node->color != color) 2698 *start += 4096; 2699 2700 node = list_first_entry_or_null(&node->node_list, 2701 struct drm_mm_node, 2702 node_list); 2703 if (node && node->allocated && node->color != color) 2704 *end -= 4096; 2705 } 2706 2707 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) 2708 { 2709 /* Let GEM Manage all of the aperture. 2710 * 2711 * However, leave one page at the end still bound to the scratch page. 2712 * There are a number of places where the hardware apparently prefetches 2713 * past the end of the object, and we've seen multiple hangs with the 2714 * GPU head pointer stuck in a batchbuffer bound at the last page of the 2715 * aperture. One page should be enough to keep any prefetching inside 2716 * of the aperture. 2717 */ 2718 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2719 unsigned long hole_start, hole_end; 2720 struct drm_mm_node *entry; 2721 int ret; 2722 2723 ret = intel_vgt_balloon(dev_priv); 2724 if (ret) 2725 return ret; 2726 2727 /* Clear any non-preallocated blocks */ 2728 drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { 2729 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 2730 hole_start, hole_end); 2731 ggtt->base.clear_range(&ggtt->base, hole_start, 2732 hole_end - hole_start, true); 2733 } 2734 2735 /* And finally clear the reserved guard page */ 2736 ggtt->base.clear_range(&ggtt->base, 2737 ggtt->base.total - PAGE_SIZE, PAGE_SIZE, 2738 true); 2739 2740 if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { 2741 struct i915_hw_ppgtt *ppgtt; 2742 2743 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2744 if (!ppgtt) 2745 return -ENOMEM; 2746 2747 ret = __hw_ppgtt_init(ppgtt, dev_priv); 2748 if (ret) { 2749 kfree(ppgtt); 2750 return ret; 2751 } 2752 2753 if (ppgtt->base.allocate_va_range) 2754 ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0, 2755 ppgtt->base.total); 2756 if (ret) { 2757 ppgtt->base.cleanup(&ppgtt->base); 2758 kfree(ppgtt); 2759 return ret; 2760 } 2761 2762 ppgtt->base.clear_range(&ppgtt->base, 2763 ppgtt->base.start, 2764 ppgtt->base.total, 2765 true); 2766 2767 dev_priv->mm.aliasing_ppgtt = ppgtt; 2768 WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); 2769 ggtt->base.bind_vma = aliasing_gtt_bind_vma; 2770 } 2771 2772 return 0; 2773 } 2774 2775 /** 2776 * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization 2777 * @dev_priv: i915 device 2778 */ 2779 void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) 2780 { 2781 struct i915_ggtt *ggtt = &dev_priv->ggtt; 2782 2783 if (dev_priv->mm.aliasing_ppgtt) { 2784 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 2785 ppgtt->base.cleanup(&ppgtt->base); 2786 kfree(ppgtt); 2787 } 2788 2789 i915_gem_cleanup_stolen(&dev_priv->drm); 2790 2791 if (drm_mm_initialized(&ggtt->base.mm)) { 2792 intel_vgt_deballoon(dev_priv); 2793 2794 drm_mm_takedown(&ggtt->base.mm); 2795 list_del(&ggtt->base.global_link); 2796 } 2797 2798 ggtt->base.cleanup(&ggtt->base); 2799 2800 arch_phys_wc_del(ggtt->mtrr); 2801 io_mapping_fini(&ggtt->mappable); 2802 } 2803 2804 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 2805 { 2806 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 2807 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 2808 return snb_gmch_ctl << 20; 2809 } 2810 2811 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 2812 { 2813 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 2814 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 2815 if (bdw_gmch_ctl) 2816 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 2817 2818 #ifdef CONFIG_X86_32 2819 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ 2820 if (bdw_gmch_ctl > 4) 2821 bdw_gmch_ctl = 4; 2822 #endif 2823 2824 return bdw_gmch_ctl << 20; 2825 } 2826 2827 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 2828 { 2829 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 2830 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 2831 2832 if (gmch_ctrl) 2833 return 1 << (20 + gmch_ctrl); 2834 2835 return 0; 2836 } 2837 2838 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 2839 { 2840 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 2841 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 2842 return snb_gmch_ctl << 25; /* 32 MB units */ 2843 } 2844 2845 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 2846 { 2847 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2848 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 2849 return bdw_gmch_ctl << 25; /* 32 MB units */ 2850 } 2851 2852 static size_t chv_get_stolen_size(u16 gmch_ctrl) 2853 { 2854 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; 2855 gmch_ctrl &= SNB_GMCH_GMS_MASK; 2856 2857 /* 2858 * 0x0 to 0x10: 32MB increments starting at 0MB 2859 * 0x11 to 0x16: 4MB increments starting at 8MB 2860 * 0x17 to 0x1d: 4MB increments start at 36MB 2861 */ 2862 if (gmch_ctrl < 0x11) 2863 return gmch_ctrl << 25; 2864 else if (gmch_ctrl < 0x17) 2865 return (gmch_ctrl - 0x11 + 2) << 22; 2866 else 2867 return (gmch_ctrl - 0x17 + 9) << 22; 2868 } 2869 2870 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) 2871 { 2872 gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2873 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; 2874 2875 if (gen9_gmch_ctl < 0xf0) 2876 return gen9_gmch_ctl << 25; /* 32 MB units */ 2877 else 2878 /* 4MB increments starting at 0xf0 for 4MB */ 2879 return (gen9_gmch_ctl - 0xf0 + 1) << 22; 2880 } 2881 2882 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) 2883 { 2884 struct pci_dev *pdev = ggtt->base.dev->pdev; 2885 phys_addr_t phys_addr; 2886 int ret; 2887 2888 /* For Modern GENs the PTEs and register space are split in the BAR */ 2889 phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2; 2890 2891 /* 2892 * On BXT writes larger than 64 bit to the GTT pagetable range will be 2893 * dropped. For WC mappings in general we have 64 byte burst writes 2894 * when the WC buffer is flushed, so we can't use it, but have to 2895 * resort to an uncached mapping. The WC issue is easily caught by the 2896 * readback check when writing GTT PTE entries. 2897 */ 2898 if (IS_BROXTON(ggtt->base.dev)) 2899 ggtt->gsm = ioremap_nocache(phys_addr, size); 2900 else 2901 ggtt->gsm = ioremap_wc(phys_addr, size); 2902 if (!ggtt->gsm) { 2903 DRM_ERROR("Failed to map the ggtt page table\n"); 2904 return -ENOMEM; 2905 } 2906 2907 ret = setup_scratch_page(ggtt->base.dev, 2908 &ggtt->base.scratch_page, 2909 GFP_DMA32); 2910 if (ret) { 2911 DRM_ERROR("Scratch setup failed\n"); 2912 /* iounmap will also get called at remove, but meh */ 2913 iounmap(ggtt->gsm); 2914 return ret; 2915 } 2916 2917 return 0; 2918 } 2919 2920 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 2921 * bits. When using advanced contexts each context stores its own PAT, but 2922 * writing this data shouldn't be harmful even in those cases. */ 2923 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) 2924 { 2925 uint64_t pat; 2926 2927 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 2928 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 2929 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 2930 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 2931 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 2932 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 2933 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 2934 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 2935 2936 if (!USES_PPGTT(dev_priv)) 2937 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry, 2938 * so RTL will always use the value corresponding to 2939 * pat_sel = 000". 2940 * So let's disable cache for GGTT to avoid screen corruptions. 2941 * MOCS still can be used though. 2942 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work 2943 * before this patch, i.e. the same uncached + snooping access 2944 * like on gen6/7 seems to be in effect. 2945 * - So this just fixes blitter/render access. Again it looks 2946 * like it's not just uncached access, but uncached + snooping. 2947 * So we can still hold onto all our assumptions wrt cpu 2948 * clflushing on LLC machines. 2949 */ 2950 pat = GEN8_PPAT(0, GEN8_PPAT_UC); 2951 2952 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 2953 * write would work. */ 2954 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 2955 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 2956 } 2957 2958 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) 2959 { 2960 uint64_t pat; 2961 2962 /* 2963 * Map WB on BDW to snooped on CHV. 2964 * 2965 * Only the snoop bit has meaning for CHV, the rest is 2966 * ignored. 2967 * 2968 * The hardware will never snoop for certain types of accesses: 2969 * - CPU GTT (GMADR->GGTT->no snoop->memory) 2970 * - PPGTT page tables 2971 * - some other special cycles 2972 * 2973 * As with BDW, we also need to consider the following for GT accesses: 2974 * "For GGTT, there is NO pat_sel[2:0] from the entry, 2975 * so RTL will always use the value corresponding to 2976 * pat_sel = 000". 2977 * Which means we must set the snoop bit in PAT entry 0 2978 * in order to keep the global status page working. 2979 */ 2980 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 2981 GEN8_PPAT(1, 0) | 2982 GEN8_PPAT(2, 0) | 2983 GEN8_PPAT(3, 0) | 2984 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 2985 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 2986 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 2987 GEN8_PPAT(7, CHV_PPAT_SNOOP); 2988 2989 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 2990 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 2991 } 2992 2993 static void gen6_gmch_remove(struct i915_address_space *vm) 2994 { 2995 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); 2996 2997 iounmap(ggtt->gsm); 2998 cleanup_scratch_page(vm->dev, &vm->scratch_page); 2999 } 3000 3001 static int gen8_gmch_probe(struct i915_ggtt *ggtt) 3002 { 3003 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); 3004 struct pci_dev *pdev = dev_priv->drm.pdev; 3005 unsigned int size; 3006 u16 snb_gmch_ctl; 3007 3008 /* TODO: We're not aware of mappable constraints on gen8 yet */ 3009 ggtt->mappable_base = pci_resource_start(pdev, 2); 3010 ggtt->mappable_end = pci_resource_len(pdev, 2); 3011 3012 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39))) 3013 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39)); 3014 3015 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3016 3017 if (INTEL_GEN(dev_priv) >= 9) { 3018 ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl); 3019 size = gen8_get_total_gtt_size(snb_gmch_ctl); 3020 } else if (IS_CHERRYVIEW(dev_priv)) { 3021 ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl); 3022 size = chv_get_total_gtt_size(snb_gmch_ctl); 3023 } else { 3024 ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl); 3025 size = gen8_get_total_gtt_size(snb_gmch_ctl); 3026 } 3027 3028 ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT; 3029 3030 if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) 3031 chv_setup_private_ppat(dev_priv); 3032 else 3033 bdw_setup_private_ppat(dev_priv); 3034 3035 ggtt->base.cleanup = gen6_gmch_remove; 3036 ggtt->base.bind_vma = ggtt_bind_vma; 3037 ggtt->base.unbind_vma = ggtt_unbind_vma; 3038 ggtt->base.insert_page = gen8_ggtt_insert_page; 3039 ggtt->base.clear_range = nop_clear_range; 3040 if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) 3041 ggtt->base.clear_range = gen8_ggtt_clear_range; 3042 3043 ggtt->base.insert_entries = gen8_ggtt_insert_entries; 3044 if (IS_CHERRYVIEW(dev_priv)) 3045 ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; 3046 3047 return ggtt_probe_common(ggtt, size); 3048 } 3049 3050 static int gen6_gmch_probe(struct i915_ggtt *ggtt) 3051 { 3052 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); 3053 struct pci_dev *pdev = dev_priv->drm.pdev; 3054 unsigned int size; 3055 u16 snb_gmch_ctl; 3056 3057 ggtt->mappable_base = pci_resource_start(pdev, 2); 3058 ggtt->mappable_end = pci_resource_len(pdev, 2); 3059 3060 /* 64/512MB is the current min/max we actually know of, but this is just 3061 * a coarse sanity check. 3062 */ 3063 if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) { 3064 DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end); 3065 return -ENXIO; 3066 } 3067 3068 if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40))) 3069 pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); 3070 pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3071 3072 ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); 3073 3074 size = gen6_get_total_gtt_size(snb_gmch_ctl); 3075 ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT; 3076 3077 ggtt->base.clear_range = gen6_ggtt_clear_range; 3078 ggtt->base.insert_page = gen6_ggtt_insert_page; 3079 ggtt->base.insert_entries = gen6_ggtt_insert_entries; 3080 ggtt->base.bind_vma = ggtt_bind_vma; 3081 ggtt->base.unbind_vma = ggtt_unbind_vma; 3082 ggtt->base.cleanup = gen6_gmch_remove; 3083 3084 if (HAS_EDRAM(dev_priv)) 3085 ggtt->base.pte_encode = iris_pte_encode; 3086 else if (IS_HASWELL(dev_priv)) 3087 ggtt->base.pte_encode = hsw_pte_encode; 3088 else if (IS_VALLEYVIEW(dev_priv)) 3089 ggtt->base.pte_encode = byt_pte_encode; 3090 else if (INTEL_GEN(dev_priv) >= 7) 3091 ggtt->base.pte_encode = ivb_pte_encode; 3092 else 3093 ggtt->base.pte_encode = snb_pte_encode; 3094 3095 return ggtt_probe_common(ggtt, size); 3096 } 3097 3098 static void i915_gmch_remove(struct i915_address_space *vm) 3099 { 3100 intel_gmch_remove(); 3101 } 3102 3103 static int i915_gmch_probe(struct i915_ggtt *ggtt) 3104 { 3105 struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev); 3106 int ret; 3107 3108 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL); 3109 if (!ret) { 3110 DRM_ERROR("failed to set up gmch\n"); 3111 return -EIO; 3112 } 3113 3114 intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size, 3115 &ggtt->mappable_base, &ggtt->mappable_end); 3116 3117 ggtt->do_idle_maps = needs_idle_maps(dev_priv); 3118 ggtt->base.insert_page = i915_ggtt_insert_page; 3119 ggtt->base.insert_entries = i915_ggtt_insert_entries; 3120 ggtt->base.clear_range = i915_ggtt_clear_range; 3121 ggtt->base.bind_vma = ggtt_bind_vma; 3122 ggtt->base.unbind_vma = ggtt_unbind_vma; 3123 ggtt->base.cleanup = i915_gmch_remove; 3124 3125 if (unlikely(ggtt->do_idle_maps)) 3126 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 3127 3128 return 0; 3129 } 3130 3131 /** 3132 * i915_ggtt_probe_hw - Probe GGTT hardware location 3133 * @dev_priv: i915 device 3134 */ 3135 int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) 3136 { 3137 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3138 int ret; 3139 3140 ggtt->base.dev = &dev_priv->drm; 3141 3142 if (INTEL_GEN(dev_priv) <= 5) 3143 ret = i915_gmch_probe(ggtt); 3144 else if (INTEL_GEN(dev_priv) < 8) 3145 ret = gen6_gmch_probe(ggtt); 3146 else 3147 ret = gen8_gmch_probe(ggtt); 3148 if (ret) 3149 return ret; 3150 3151 if ((ggtt->base.total - 1) >> 32) { 3152 DRM_ERROR("We never expected a Global GTT with more than 32bits" 3153 " of address space! Found %lldM!\n", 3154 ggtt->base.total >> 20); 3155 ggtt->base.total = 1ULL << 32; 3156 ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total); 3157 } 3158 3159 if (ggtt->mappable_end > ggtt->base.total) { 3160 DRM_ERROR("mappable aperture extends past end of GGTT," 3161 " aperture=%llx, total=%llx\n", 3162 ggtt->mappable_end, ggtt->base.total); 3163 ggtt->mappable_end = ggtt->base.total; 3164 } 3165 3166 /* GMADR is the PCI mmio aperture into the global GTT. */ 3167 DRM_INFO("Memory usable by graphics device = %lluM\n", 3168 ggtt->base.total >> 20); 3169 DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20); 3170 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", ggtt->stolen_size >> 20); 3171 #ifdef CONFIG_INTEL_IOMMU 3172 if (intel_iommu_gfx_mapped) 3173 DRM_INFO("VT-d active for gfx access\n"); 3174 #endif 3175 3176 return 0; 3177 } 3178 3179 /** 3180 * i915_ggtt_init_hw - Initialize GGTT hardware 3181 * @dev_priv: i915 device 3182 */ 3183 int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) 3184 { 3185 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3186 int ret; 3187 3188 INIT_LIST_HEAD(&dev_priv->vm_list); 3189 3190 /* Subtract the guard page before address space initialization to 3191 * shrink the range used by drm_mm. 3192 */ 3193 ggtt->base.total -= PAGE_SIZE; 3194 i915_address_space_init(&ggtt->base, dev_priv); 3195 ggtt->base.total += PAGE_SIZE; 3196 if (!HAS_LLC(dev_priv)) 3197 ggtt->base.mm.color_adjust = i915_gtt_color_adjust; 3198 3199 if (!io_mapping_init_wc(&dev_priv->ggtt.mappable, 3200 dev_priv->ggtt.mappable_base, 3201 dev_priv->ggtt.mappable_end)) { 3202 ret = -EIO; 3203 goto out_gtt_cleanup; 3204 } 3205 3206 ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end); 3207 3208 /* 3209 * Initialise stolen early so that we may reserve preallocated 3210 * objects for the BIOS to KMS transition. 3211 */ 3212 ret = i915_gem_init_stolen(&dev_priv->drm); 3213 if (ret) 3214 goto out_gtt_cleanup; 3215 3216 return 0; 3217 3218 out_gtt_cleanup: 3219 ggtt->base.cleanup(&ggtt->base); 3220 return ret; 3221 } 3222 3223 int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) 3224 { 3225 if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt()) 3226 return -EIO; 3227 3228 return 0; 3229 } 3230 3231 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 3232 { 3233 struct drm_i915_private *dev_priv = to_i915(dev); 3234 struct i915_ggtt *ggtt = &dev_priv->ggtt; 3235 struct drm_i915_gem_object *obj, *on; 3236 3237 i915_check_and_clear_faults(dev_priv); 3238 3239 /* First fill our portion of the GTT with scratch pages */ 3240 ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total, 3241 true); 3242 3243 ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */ 3244 3245 /* clflush objects bound into the GGTT and rebind them. */ 3246 list_for_each_entry_safe(obj, on, 3247 &dev_priv->mm.bound_list, global_list) { 3248 bool ggtt_bound = false; 3249 struct i915_vma *vma; 3250 3251 list_for_each_entry(vma, &obj->vma_list, obj_link) { 3252 if (vma->vm != &ggtt->base) 3253 continue; 3254 3255 if (!i915_vma_unbind(vma)) 3256 continue; 3257 3258 WARN_ON(i915_vma_bind(vma, obj->cache_level, 3259 PIN_UPDATE)); 3260 ggtt_bound = true; 3261 } 3262 3263 if (ggtt_bound) 3264 WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); 3265 } 3266 3267 ggtt->base.closed = false; 3268 3269 if (INTEL_INFO(dev)->gen >= 8) { 3270 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) 3271 chv_setup_private_ppat(dev_priv); 3272 else 3273 bdw_setup_private_ppat(dev_priv); 3274 3275 return; 3276 } 3277 3278 if (USES_PPGTT(dev)) { 3279 struct i915_address_space *vm; 3280 3281 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 3282 /* TODO: Perhaps it shouldn't be gen6 specific */ 3283 3284 struct i915_hw_ppgtt *ppgtt; 3285 3286 if (i915_is_ggtt(vm)) 3287 ppgtt = dev_priv->mm.aliasing_ppgtt; 3288 else 3289 ppgtt = i915_vm_to_ppgtt(vm); 3290 3291 gen6_write_page_range(dev_priv, &ppgtt->pd, 3292 0, ppgtt->base.total); 3293 } 3294 } 3295 3296 i915_ggtt_flush(dev_priv); 3297 } 3298 3299 static void 3300 i915_vma_retire(struct i915_gem_active *active, 3301 struct drm_i915_gem_request *rq) 3302 { 3303 const unsigned int idx = rq->engine->id; 3304 struct i915_vma *vma = 3305 container_of(active, struct i915_vma, last_read[idx]); 3306 3307 GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx)); 3308 3309 i915_vma_clear_active(vma, idx); 3310 if (i915_vma_is_active(vma)) 3311 return; 3312 3313 list_move_tail(&vma->vm_link, &vma->vm->inactive_list); 3314 if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) 3315 WARN_ON(i915_vma_unbind(vma)); 3316 } 3317 3318 void i915_vma_destroy(struct i915_vma *vma) 3319 { 3320 GEM_BUG_ON(vma->node.allocated); 3321 GEM_BUG_ON(i915_vma_is_active(vma)); 3322 GEM_BUG_ON(!i915_vma_is_closed(vma)); 3323 GEM_BUG_ON(vma->fence); 3324 3325 list_del(&vma->vm_link); 3326 if (!i915_vma_is_ggtt(vma)) 3327 i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); 3328 3329 kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); 3330 } 3331 3332 void i915_vma_close(struct i915_vma *vma) 3333 { 3334 GEM_BUG_ON(i915_vma_is_closed(vma)); 3335 vma->flags |= I915_VMA_CLOSED; 3336 3337 list_del_init(&vma->obj_link); 3338 if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) 3339 WARN_ON(i915_vma_unbind(vma)); 3340 } 3341 3342 static struct i915_vma * 3343 __i915_vma_create(struct drm_i915_gem_object *obj, 3344 struct i915_address_space *vm, 3345 const struct i915_ggtt_view *view) 3346 { 3347 struct i915_vma *vma; 3348 int i; 3349 3350 GEM_BUG_ON(vm->closed); 3351 3352 vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); 3353 if (vma == NULL) 3354 return ERR_PTR(-ENOMEM); 3355 3356 INIT_LIST_HEAD(&vma->exec_list); 3357 for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) 3358 init_request_active(&vma->last_read[i], i915_vma_retire); 3359 init_request_active(&vma->last_fence, NULL); 3360 list_add(&vma->vm_link, &vm->unbound_list); 3361 vma->vm = vm; 3362 vma->obj = obj; 3363 vma->size = obj->base.size; 3364 3365 if (view) { 3366 vma->ggtt_view = *view; 3367 if (view->type == I915_GGTT_VIEW_PARTIAL) { 3368 vma->size = view->params.partial.size; 3369 vma->size <<= PAGE_SHIFT; 3370 } else if (view->type == I915_GGTT_VIEW_ROTATED) { 3371 vma->size = 3372 intel_rotation_info_size(&view->params.rotated); 3373 vma->size <<= PAGE_SHIFT; 3374 } 3375 } 3376 3377 if (i915_is_ggtt(vm)) { 3378 vma->flags |= I915_VMA_GGTT; 3379 } else { 3380 i915_ppgtt_get(i915_vm_to_ppgtt(vm)); 3381 } 3382 3383 list_add_tail(&vma->obj_link, &obj->vma_list); 3384 return vma; 3385 } 3386 3387 static inline bool vma_matches(struct i915_vma *vma, 3388 struct i915_address_space *vm, 3389 const struct i915_ggtt_view *view) 3390 { 3391 if (vma->vm != vm) 3392 return false; 3393 3394 if (!i915_vma_is_ggtt(vma)) 3395 return true; 3396 3397 if (!view) 3398 return vma->ggtt_view.type == 0; 3399 3400 if (vma->ggtt_view.type != view->type) 3401 return false; 3402 3403 return memcmp(&vma->ggtt_view.params, 3404 &view->params, 3405 sizeof(view->params)) == 0; 3406 } 3407 3408 struct i915_vma * 3409 i915_vma_create(struct drm_i915_gem_object *obj, 3410 struct i915_address_space *vm, 3411 const struct i915_ggtt_view *view) 3412 { 3413 GEM_BUG_ON(view && !i915_is_ggtt(vm)); 3414 GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view)); 3415 3416 return __i915_vma_create(obj, vm, view); 3417 } 3418 3419 struct i915_vma * 3420 i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, 3421 struct i915_address_space *vm, 3422 const struct i915_ggtt_view *view) 3423 { 3424 struct i915_vma *vma; 3425 3426 list_for_each_entry_reverse(vma, &obj->vma_list, obj_link) 3427 if (vma_matches(vma, vm, view)) 3428 return vma; 3429 3430 return NULL; 3431 } 3432 3433 struct i915_vma * 3434 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 3435 struct i915_address_space *vm, 3436 const struct i915_ggtt_view *view) 3437 { 3438 struct i915_vma *vma; 3439 3440 GEM_BUG_ON(view && !i915_is_ggtt(vm)); 3441 3442 vma = i915_gem_obj_to_vma(obj, vm, view); 3443 if (!vma) 3444 vma = __i915_vma_create(obj, vm, view); 3445 3446 GEM_BUG_ON(i915_vma_is_closed(vma)); 3447 return vma; 3448 } 3449 3450 static struct scatterlist * 3451 rotate_pages(const dma_addr_t *in, unsigned int offset, 3452 unsigned int width, unsigned int height, 3453 unsigned int stride, 3454 struct sg_table *st, struct scatterlist *sg) 3455 { 3456 unsigned int column, row; 3457 unsigned int src_idx; 3458 3459 for (column = 0; column < width; column++) { 3460 src_idx = stride * (height - 1) + column; 3461 for (row = 0; row < height; row++) { 3462 st->nents++; 3463 /* We don't need the pages, but need to initialize 3464 * the entries so the sg list can be happily traversed. 3465 * The only thing we need are DMA addresses. 3466 */ 3467 sg_set_page(sg, NULL, PAGE_SIZE, 0); 3468 sg_dma_address(sg) = in[offset + src_idx]; 3469 sg_dma_len(sg) = PAGE_SIZE; 3470 sg = sg_next(sg); 3471 src_idx -= stride; 3472 } 3473 } 3474 3475 return sg; 3476 } 3477 3478 static struct sg_table * 3479 intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info, 3480 struct drm_i915_gem_object *obj) 3481 { 3482 const size_t n_pages = obj->base.size / PAGE_SIZE; 3483 unsigned int size = intel_rotation_info_size(rot_info); 3484 struct sgt_iter sgt_iter; 3485 dma_addr_t dma_addr; 3486 unsigned long i; 3487 dma_addr_t *page_addr_list; 3488 struct sg_table *st; 3489 struct scatterlist *sg; 3490 int ret = -ENOMEM; 3491 3492 /* Allocate a temporary list of source pages for random access. */ 3493 page_addr_list = drm_malloc_gfp(n_pages, 3494 sizeof(dma_addr_t), 3495 GFP_TEMPORARY); 3496 if (!page_addr_list) 3497 return ERR_PTR(ret); 3498 3499 /* Allocate target SG list. */ 3500 st = kmalloc(sizeof(*st), GFP_KERNEL); 3501 if (!st) 3502 goto err_st_alloc; 3503 3504 ret = sg_alloc_table(st, size, GFP_KERNEL); 3505 if (ret) 3506 goto err_sg_alloc; 3507 3508 /* Populate source page list from the object. */ 3509 i = 0; 3510 for_each_sgt_dma(dma_addr, sgt_iter, obj->pages) 3511 page_addr_list[i++] = dma_addr; 3512 3513 GEM_BUG_ON(i != n_pages); 3514 st->nents = 0; 3515 sg = st->sgl; 3516 3517 for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { 3518 sg = rotate_pages(page_addr_list, rot_info->plane[i].offset, 3519 rot_info->plane[i].width, rot_info->plane[i].height, 3520 rot_info->plane[i].stride, st, sg); 3521 } 3522 3523 DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n", 3524 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); 3525 3526 drm_free_large(page_addr_list); 3527 3528 return st; 3529 3530 err_sg_alloc: 3531 kfree(st); 3532 err_st_alloc: 3533 drm_free_large(page_addr_list); 3534 3535 DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", 3536 obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); 3537 3538 return ERR_PTR(ret); 3539 } 3540 3541 static struct sg_table * 3542 intel_partial_pages(const struct i915_ggtt_view *view, 3543 struct drm_i915_gem_object *obj) 3544 { 3545 struct sg_table *st; 3546 struct scatterlist *sg; 3547 struct sg_page_iter obj_sg_iter; 3548 int ret = -ENOMEM; 3549 3550 st = kmalloc(sizeof(*st), GFP_KERNEL); 3551 if (!st) 3552 goto err_st_alloc; 3553 3554 ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL); 3555 if (ret) 3556 goto err_sg_alloc; 3557 3558 sg = st->sgl; 3559 st->nents = 0; 3560 for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents, 3561 view->params.partial.offset) 3562 { 3563 if (st->nents >= view->params.partial.size) 3564 break; 3565 3566 sg_set_page(sg, NULL, PAGE_SIZE, 0); 3567 sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter); 3568 sg_dma_len(sg) = PAGE_SIZE; 3569 3570 sg = sg_next(sg); 3571 st->nents++; 3572 } 3573 3574 return st; 3575 3576 err_sg_alloc: 3577 kfree(st); 3578 err_st_alloc: 3579 return ERR_PTR(ret); 3580 } 3581 3582 static int 3583 i915_get_ggtt_vma_pages(struct i915_vma *vma) 3584 { 3585 int ret = 0; 3586 3587 if (vma->pages) 3588 return 0; 3589 3590 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) 3591 vma->pages = vma->obj->pages; 3592 else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) 3593 vma->pages = 3594 intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj); 3595 else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) 3596 vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); 3597 else 3598 WARN_ONCE(1, "GGTT view %u not implemented!\n", 3599 vma->ggtt_view.type); 3600 3601 if (!vma->pages) { 3602 DRM_ERROR("Failed to get pages for GGTT view type %u!\n", 3603 vma->ggtt_view.type); 3604 ret = -EINVAL; 3605 } else if (IS_ERR(vma->pages)) { 3606 ret = PTR_ERR(vma->pages); 3607 vma->pages = NULL; 3608 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", 3609 vma->ggtt_view.type, ret); 3610 } 3611 3612 return ret; 3613 } 3614 3615 /** 3616 * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. 3617 * @vma: VMA to map 3618 * @cache_level: mapping cache level 3619 * @flags: flags like global or local mapping 3620 * 3621 * DMA addresses are taken from the scatter-gather table of this object (or of 3622 * this VMA in case of non-default GGTT views) and PTE entries set up. 3623 * Note that DMA addresses are also the only part of the SG table we care about. 3624 */ 3625 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, 3626 u32 flags) 3627 { 3628 u32 bind_flags; 3629 u32 vma_flags; 3630 int ret; 3631 3632 if (WARN_ON(flags == 0)) 3633 return -EINVAL; 3634 3635 bind_flags = 0; 3636 if (flags & PIN_GLOBAL) 3637 bind_flags |= I915_VMA_GLOBAL_BIND; 3638 if (flags & PIN_USER) 3639 bind_flags |= I915_VMA_LOCAL_BIND; 3640 3641 vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); 3642 if (flags & PIN_UPDATE) 3643 bind_flags |= vma_flags; 3644 else 3645 bind_flags &= ~vma_flags; 3646 if (bind_flags == 0) 3647 return 0; 3648 3649 if (vma_flags == 0 && vma->vm->allocate_va_range) { 3650 trace_i915_va_alloc(vma); 3651 ret = vma->vm->allocate_va_range(vma->vm, 3652 vma->node.start, 3653 vma->node.size); 3654 if (ret) 3655 return ret; 3656 } 3657 3658 ret = vma->vm->bind_vma(vma, cache_level, bind_flags); 3659 if (ret) 3660 return ret; 3661 3662 vma->flags |= bind_flags; 3663 return 0; 3664 } 3665 3666 void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) 3667 { 3668 void __iomem *ptr; 3669 3670 /* Access through the GTT requires the device to be awake. */ 3671 assert_rpm_wakelock_held(to_i915(vma->vm->dev)); 3672 3673 lockdep_assert_held(&vma->vm->dev->struct_mutex); 3674 if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) 3675 return IO_ERR_PTR(-ENODEV); 3676 3677 GEM_BUG_ON(!i915_vma_is_ggtt(vma)); 3678 GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); 3679 3680 ptr = vma->iomap; 3681 if (ptr == NULL) { 3682 ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable, 3683 vma->node.start, 3684 vma->node.size); 3685 if (ptr == NULL) 3686 return IO_ERR_PTR(-ENOMEM); 3687 3688 vma->iomap = ptr; 3689 } 3690 3691 __i915_vma_pin(vma); 3692 return ptr; 3693 } 3694 3695 void i915_vma_unpin_and_release(struct i915_vma **p_vma) 3696 { 3697 struct i915_vma *vma; 3698 3699 vma = fetch_and_zero(p_vma); 3700 if (!vma) 3701 return; 3702 3703 i915_vma_unpin(vma); 3704 i915_vma_put(vma); 3705 } 3706