1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/seq_file.h> 27 #include <linux/stop_machine.h> 28 #include <drm/drmP.h> 29 #include <drm/i915_drm.h> 30 #include "i915_drv.h" 31 #include "i915_vgpu.h" 32 #include "i915_trace.h" 33 #include "intel_drv.h" 34 35 /** 36 * DOC: Global GTT views 37 * 38 * Background and previous state 39 * 40 * Historically objects could exists (be bound) in global GTT space only as 41 * singular instances with a view representing all of the object's backing pages 42 * in a linear fashion. This view will be called a normal view. 43 * 44 * To support multiple views of the same object, where the number of mapped 45 * pages is not equal to the backing store, or where the layout of the pages 46 * is not linear, concept of a GGTT view was added. 47 * 48 * One example of an alternative view is a stereo display driven by a single 49 * image. In this case we would have a framebuffer looking like this 50 * (2x2 pages): 51 * 52 * 12 53 * 34 54 * 55 * Above would represent a normal GGTT view as normally mapped for GPU or CPU 56 * rendering. In contrast, fed to the display engine would be an alternative 57 * view which could look something like this: 58 * 59 * 1212 60 * 3434 61 * 62 * In this example both the size and layout of pages in the alternative view is 63 * different from the normal view. 64 * 65 * Implementation and usage 66 * 67 * GGTT views are implemented using VMAs and are distinguished via enum 68 * i915_ggtt_view_type and struct i915_ggtt_view. 69 * 70 * A new flavour of core GEM functions which work with GGTT bound objects were 71 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid 72 * renaming in large amounts of code. They take the struct i915_ggtt_view 73 * parameter encapsulating all metadata required to implement a view. 74 * 75 * As a helper for callers which are only interested in the normal view, 76 * globally const i915_ggtt_view_normal singleton instance exists. All old core 77 * GEM API functions, the ones not taking the view parameter, are operating on, 78 * or with the normal GGTT view. 79 * 80 * Code wanting to add or use a new GGTT view needs to: 81 * 82 * 1. Add a new enum with a suitable name. 83 * 2. Extend the metadata in the i915_ggtt_view structure if required. 84 * 3. Add support to i915_get_vma_pages(). 85 * 86 * New views are required to build a scatter-gather table from within the 87 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and 88 * exists for the lifetime of an VMA. 89 * 90 * Core API is designed to have copy semantics which means that passed in 91 * struct i915_ggtt_view does not need to be persistent (left around after 92 * calling the core API functions). 93 * 94 */ 95 96 static int 97 i915_get_ggtt_vma_pages(struct i915_vma *vma); 98 99 const struct i915_ggtt_view i915_ggtt_view_normal; 100 const struct i915_ggtt_view i915_ggtt_view_rotated = { 101 .type = I915_GGTT_VIEW_ROTATED 102 }; 103 104 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt) 105 { 106 bool has_aliasing_ppgtt; 107 bool has_full_ppgtt; 108 bool has_full_48bit_ppgtt; 109 110 has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6; 111 has_full_ppgtt = INTEL_INFO(dev)->gen >= 7; 112 has_full_48bit_ppgtt = IS_BROADWELL(dev) || INTEL_INFO(dev)->gen >= 9; 113 114 if (intel_vgpu_active(dev)) 115 has_full_ppgtt = false; /* emulation is too hard */ 116 117 /* 118 * We don't allow disabling PPGTT for gen9+ as it's a requirement for 119 * execlists, the sole mechanism available to submit work. 120 */ 121 if (INTEL_INFO(dev)->gen < 9 && 122 (enable_ppgtt == 0 || !has_aliasing_ppgtt)) 123 return 0; 124 125 if (enable_ppgtt == 1) 126 return 1; 127 128 if (enable_ppgtt == 2 && has_full_ppgtt) 129 return 2; 130 131 if (enable_ppgtt == 3 && has_full_48bit_ppgtt) 132 return 3; 133 134 #ifdef CONFIG_INTEL_IOMMU 135 /* Disable ppgtt on SNB if VT-d is on. */ 136 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) { 137 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 138 return 0; 139 } 140 #endif 141 142 /* Early VLV doesn't have this */ 143 if (IS_VALLEYVIEW(dev) && dev->pdev->revision < 0xb) { 144 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n"); 145 return 0; 146 } 147 148 if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists) 149 return has_full_48bit_ppgtt ? 3 : 2; 150 else 151 return has_aliasing_ppgtt ? 1 : 0; 152 } 153 154 static int ppgtt_bind_vma(struct i915_vma *vma, 155 enum i915_cache_level cache_level, 156 u32 unused) 157 { 158 u32 pte_flags = 0; 159 160 /* Currently applicable only to VLV */ 161 if (vma->obj->gt_ro) 162 pte_flags |= PTE_READ_ONLY; 163 164 vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start, 165 cache_level, pte_flags); 166 167 return 0; 168 } 169 170 static void ppgtt_unbind_vma(struct i915_vma *vma) 171 { 172 vma->vm->clear_range(vma->vm, 173 vma->node.start, 174 vma->obj->base.size, 175 true); 176 } 177 178 static gen8_pte_t gen8_pte_encode(dma_addr_t addr, 179 enum i915_cache_level level, 180 bool valid) 181 { 182 gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; 183 pte |= addr; 184 185 switch (level) { 186 case I915_CACHE_NONE: 187 pte |= PPAT_UNCACHED_INDEX; 188 break; 189 case I915_CACHE_WT: 190 pte |= PPAT_DISPLAY_ELLC_INDEX; 191 break; 192 default: 193 pte |= PPAT_CACHED_INDEX; 194 break; 195 } 196 197 return pte; 198 } 199 200 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, 201 const enum i915_cache_level level) 202 { 203 gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 204 pde |= addr; 205 if (level != I915_CACHE_NONE) 206 pde |= PPAT_CACHED_PDE_INDEX; 207 else 208 pde |= PPAT_UNCACHED_INDEX; 209 return pde; 210 } 211 212 #define gen8_pdpe_encode gen8_pde_encode 213 #define gen8_pml4e_encode gen8_pde_encode 214 215 static gen6_pte_t snb_pte_encode(dma_addr_t addr, 216 enum i915_cache_level level, 217 bool valid, u32 unused) 218 { 219 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 220 pte |= GEN6_PTE_ADDR_ENCODE(addr); 221 222 switch (level) { 223 case I915_CACHE_L3_LLC: 224 case I915_CACHE_LLC: 225 pte |= GEN6_PTE_CACHE_LLC; 226 break; 227 case I915_CACHE_NONE: 228 pte |= GEN6_PTE_UNCACHED; 229 break; 230 default: 231 MISSING_CASE(level); 232 } 233 234 return pte; 235 } 236 237 static gen6_pte_t ivb_pte_encode(dma_addr_t addr, 238 enum i915_cache_level level, 239 bool valid, u32 unused) 240 { 241 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 242 pte |= GEN6_PTE_ADDR_ENCODE(addr); 243 244 switch (level) { 245 case I915_CACHE_L3_LLC: 246 pte |= GEN7_PTE_CACHE_L3_LLC; 247 break; 248 case I915_CACHE_LLC: 249 pte |= GEN6_PTE_CACHE_LLC; 250 break; 251 case I915_CACHE_NONE: 252 pte |= GEN6_PTE_UNCACHED; 253 break; 254 default: 255 MISSING_CASE(level); 256 } 257 258 return pte; 259 } 260 261 static gen6_pte_t byt_pte_encode(dma_addr_t addr, 262 enum i915_cache_level level, 263 bool valid, u32 flags) 264 { 265 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 266 pte |= GEN6_PTE_ADDR_ENCODE(addr); 267 268 if (!(flags & PTE_READ_ONLY)) 269 pte |= BYT_PTE_WRITEABLE; 270 271 if (level != I915_CACHE_NONE) 272 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 273 274 return pte; 275 } 276 277 static gen6_pte_t hsw_pte_encode(dma_addr_t addr, 278 enum i915_cache_level level, 279 bool valid, u32 unused) 280 { 281 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 282 pte |= HSW_PTE_ADDR_ENCODE(addr); 283 284 if (level != I915_CACHE_NONE) 285 pte |= HSW_WB_LLC_AGE3; 286 287 return pte; 288 } 289 290 static gen6_pte_t iris_pte_encode(dma_addr_t addr, 291 enum i915_cache_level level, 292 bool valid, u32 unused) 293 { 294 gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; 295 pte |= HSW_PTE_ADDR_ENCODE(addr); 296 297 switch (level) { 298 case I915_CACHE_NONE: 299 break; 300 case I915_CACHE_WT: 301 pte |= HSW_WT_ELLC_LLC_AGE3; 302 break; 303 default: 304 pte |= HSW_WB_ELLC_LLC_AGE3; 305 break; 306 } 307 308 return pte; 309 } 310 311 static int __setup_page_dma(struct drm_device *dev, 312 struct i915_page_dma *p, gfp_t flags) 313 { 314 struct device *device = &dev->pdev->dev; 315 316 p->page = alloc_page(flags); 317 if (!p->page) 318 return -ENOMEM; 319 320 p->daddr = dma_map_page(device, 321 p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL); 322 323 if (dma_mapping_error(device, p->daddr)) { 324 __free_page(p->page); 325 return -EINVAL; 326 } 327 328 return 0; 329 } 330 331 static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p) 332 { 333 return __setup_page_dma(dev, p, GFP_KERNEL); 334 } 335 336 static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p) 337 { 338 if (WARN_ON(!p->page)) 339 return; 340 341 dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL); 342 __free_page(p->page); 343 memset(p, 0, sizeof(*p)); 344 } 345 346 static void *kmap_page_dma(struct i915_page_dma *p) 347 { 348 return kmap_atomic(p->page); 349 } 350 351 /* We use the flushing unmap only with ppgtt structures: 352 * page directories, page tables and scratch pages. 353 */ 354 static void kunmap_page_dma(struct drm_device *dev, void *vaddr) 355 { 356 /* There are only few exceptions for gen >=6. chv and bxt. 357 * And we are not sure about the latter so play safe for now. 358 */ 359 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) 360 drm_clflush_virt_range(vaddr, PAGE_SIZE); 361 362 kunmap_atomic(vaddr); 363 } 364 365 #define kmap_px(px) kmap_page_dma(px_base(px)) 366 #define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr)) 367 368 #define setup_px(dev, px) setup_page_dma((dev), px_base(px)) 369 #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px)) 370 #define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v)) 371 #define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v)) 372 373 static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p, 374 const uint64_t val) 375 { 376 int i; 377 uint64_t * const vaddr = kmap_page_dma(p); 378 379 for (i = 0; i < 512; i++) 380 vaddr[i] = val; 381 382 kunmap_page_dma(dev, vaddr); 383 } 384 385 static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p, 386 const uint32_t val32) 387 { 388 uint64_t v = val32; 389 390 v = v << 32 | val32; 391 392 fill_page_dma(dev, p, v); 393 } 394 395 static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev) 396 { 397 struct i915_page_scratch *sp; 398 int ret; 399 400 sp = kzalloc(sizeof(*sp), GFP_KERNEL); 401 if (sp == NULL) 402 return ERR_PTR(-ENOMEM); 403 404 ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO); 405 if (ret) { 406 kfree(sp); 407 return ERR_PTR(ret); 408 } 409 410 set_pages_uc(px_page(sp), 1); 411 412 return sp; 413 } 414 415 static void free_scratch_page(struct drm_device *dev, 416 struct i915_page_scratch *sp) 417 { 418 set_pages_wb(px_page(sp), 1); 419 420 cleanup_px(dev, sp); 421 kfree(sp); 422 } 423 424 static struct i915_page_table *alloc_pt(struct drm_device *dev) 425 { 426 struct i915_page_table *pt; 427 const size_t count = INTEL_INFO(dev)->gen >= 8 ? 428 GEN8_PTES : GEN6_PTES; 429 int ret = -ENOMEM; 430 431 pt = kzalloc(sizeof(*pt), GFP_KERNEL); 432 if (!pt) 433 return ERR_PTR(-ENOMEM); 434 435 pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), 436 GFP_KERNEL); 437 438 if (!pt->used_ptes) 439 goto fail_bitmap; 440 441 ret = setup_px(dev, pt); 442 if (ret) 443 goto fail_page_m; 444 445 return pt; 446 447 fail_page_m: 448 kfree(pt->used_ptes); 449 fail_bitmap: 450 kfree(pt); 451 452 return ERR_PTR(ret); 453 } 454 455 static void free_pt(struct drm_device *dev, struct i915_page_table *pt) 456 { 457 cleanup_px(dev, pt); 458 kfree(pt->used_ptes); 459 kfree(pt); 460 } 461 462 static void gen8_initialize_pt(struct i915_address_space *vm, 463 struct i915_page_table *pt) 464 { 465 gen8_pte_t scratch_pte; 466 467 scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), 468 I915_CACHE_LLC, true); 469 470 fill_px(vm->dev, pt, scratch_pte); 471 } 472 473 static void gen6_initialize_pt(struct i915_address_space *vm, 474 struct i915_page_table *pt) 475 { 476 gen6_pte_t scratch_pte; 477 478 WARN_ON(px_dma(vm->scratch_page) == 0); 479 480 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), 481 I915_CACHE_LLC, true, 0); 482 483 fill32_px(vm->dev, pt, scratch_pte); 484 } 485 486 static struct i915_page_directory *alloc_pd(struct drm_device *dev) 487 { 488 struct i915_page_directory *pd; 489 int ret = -ENOMEM; 490 491 pd = kzalloc(sizeof(*pd), GFP_KERNEL); 492 if (!pd) 493 return ERR_PTR(-ENOMEM); 494 495 pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES), 496 sizeof(*pd->used_pdes), GFP_KERNEL); 497 if (!pd->used_pdes) 498 goto fail_bitmap; 499 500 ret = setup_px(dev, pd); 501 if (ret) 502 goto fail_page_m; 503 504 return pd; 505 506 fail_page_m: 507 kfree(pd->used_pdes); 508 fail_bitmap: 509 kfree(pd); 510 511 return ERR_PTR(ret); 512 } 513 514 static void free_pd(struct drm_device *dev, struct i915_page_directory *pd) 515 { 516 if (px_page(pd)) { 517 cleanup_px(dev, pd); 518 kfree(pd->used_pdes); 519 kfree(pd); 520 } 521 } 522 523 static void gen8_initialize_pd(struct i915_address_space *vm, 524 struct i915_page_directory *pd) 525 { 526 gen8_pde_t scratch_pde; 527 528 scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC); 529 530 fill_px(vm->dev, pd, scratch_pde); 531 } 532 533 static int __pdp_init(struct drm_device *dev, 534 struct i915_page_directory_pointer *pdp) 535 { 536 size_t pdpes = I915_PDPES_PER_PDP(dev); 537 538 pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), 539 sizeof(unsigned long), 540 GFP_KERNEL); 541 if (!pdp->used_pdpes) 542 return -ENOMEM; 543 544 pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory), 545 GFP_KERNEL); 546 if (!pdp->page_directory) { 547 kfree(pdp->used_pdpes); 548 /* the PDP might be the statically allocated top level. Keep it 549 * as clean as possible */ 550 pdp->used_pdpes = NULL; 551 return -ENOMEM; 552 } 553 554 return 0; 555 } 556 557 static void __pdp_fini(struct i915_page_directory_pointer *pdp) 558 { 559 kfree(pdp->used_pdpes); 560 kfree(pdp->page_directory); 561 pdp->page_directory = NULL; 562 } 563 564 static struct 565 i915_page_directory_pointer *alloc_pdp(struct drm_device *dev) 566 { 567 struct i915_page_directory_pointer *pdp; 568 int ret = -ENOMEM; 569 570 WARN_ON(!USES_FULL_48BIT_PPGTT(dev)); 571 572 pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); 573 if (!pdp) 574 return ERR_PTR(-ENOMEM); 575 576 ret = __pdp_init(dev, pdp); 577 if (ret) 578 goto fail_bitmap; 579 580 ret = setup_px(dev, pdp); 581 if (ret) 582 goto fail_page_m; 583 584 return pdp; 585 586 fail_page_m: 587 __pdp_fini(pdp); 588 fail_bitmap: 589 kfree(pdp); 590 591 return ERR_PTR(ret); 592 } 593 594 static void free_pdp(struct drm_device *dev, 595 struct i915_page_directory_pointer *pdp) 596 { 597 __pdp_fini(pdp); 598 if (USES_FULL_48BIT_PPGTT(dev)) { 599 cleanup_px(dev, pdp); 600 kfree(pdp); 601 } 602 } 603 604 static void gen8_initialize_pdp(struct i915_address_space *vm, 605 struct i915_page_directory_pointer *pdp) 606 { 607 gen8_ppgtt_pdpe_t scratch_pdpe; 608 609 scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); 610 611 fill_px(vm->dev, pdp, scratch_pdpe); 612 } 613 614 static void gen8_initialize_pml4(struct i915_address_space *vm, 615 struct i915_pml4 *pml4) 616 { 617 gen8_ppgtt_pml4e_t scratch_pml4e; 618 619 scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), 620 I915_CACHE_LLC); 621 622 fill_px(vm->dev, pml4, scratch_pml4e); 623 } 624 625 static void 626 gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt, 627 struct i915_page_directory_pointer *pdp, 628 struct i915_page_directory *pd, 629 int index) 630 { 631 gen8_ppgtt_pdpe_t *page_directorypo; 632 633 if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) 634 return; 635 636 page_directorypo = kmap_px(pdp); 637 page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); 638 kunmap_px(ppgtt, page_directorypo); 639 } 640 641 static void 642 gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt, 643 struct i915_pml4 *pml4, 644 struct i915_page_directory_pointer *pdp, 645 int index) 646 { 647 gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); 648 649 WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)); 650 pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); 651 kunmap_px(ppgtt, pagemap); 652 } 653 654 /* Broadwell Page Directory Pointer Descriptors */ 655 static int gen8_write_pdp(struct drm_i915_gem_request *req, 656 unsigned entry, 657 dma_addr_t addr) 658 { 659 struct intel_engine_cs *ring = req->ring; 660 int ret; 661 662 BUG_ON(entry >= 4); 663 664 ret = intel_ring_begin(req, 6); 665 if (ret) 666 return ret; 667 668 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 669 intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(ring, entry)); 670 intel_ring_emit(ring, upper_32_bits(addr)); 671 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 672 intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(ring, entry)); 673 intel_ring_emit(ring, lower_32_bits(addr)); 674 intel_ring_advance(ring); 675 676 return 0; 677 } 678 679 static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, 680 struct drm_i915_gem_request *req) 681 { 682 int i, ret; 683 684 for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { 685 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); 686 687 ret = gen8_write_pdp(req, i, pd_daddr); 688 if (ret) 689 return ret; 690 } 691 692 return 0; 693 } 694 695 static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, 696 struct drm_i915_gem_request *req) 697 { 698 return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); 699 } 700 701 static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm, 702 struct i915_page_directory_pointer *pdp, 703 uint64_t start, 704 uint64_t length, 705 gen8_pte_t scratch_pte) 706 { 707 struct i915_hw_ppgtt *ppgtt = 708 container_of(vm, struct i915_hw_ppgtt, base); 709 gen8_pte_t *pt_vaddr; 710 unsigned pdpe = gen8_pdpe_index(start); 711 unsigned pde = gen8_pde_index(start); 712 unsigned pte = gen8_pte_index(start); 713 unsigned num_entries = length >> PAGE_SHIFT; 714 unsigned last_pte, i; 715 716 if (WARN_ON(!pdp)) 717 return; 718 719 while (num_entries) { 720 struct i915_page_directory *pd; 721 struct i915_page_table *pt; 722 723 if (WARN_ON(!pdp->page_directory[pdpe])) 724 break; 725 726 pd = pdp->page_directory[pdpe]; 727 728 if (WARN_ON(!pd->page_table[pde])) 729 break; 730 731 pt = pd->page_table[pde]; 732 733 if (WARN_ON(!px_page(pt))) 734 break; 735 736 last_pte = pte + num_entries; 737 if (last_pte > GEN8_PTES) 738 last_pte = GEN8_PTES; 739 740 pt_vaddr = kmap_px(pt); 741 742 for (i = pte; i < last_pte; i++) { 743 pt_vaddr[i] = scratch_pte; 744 num_entries--; 745 } 746 747 kunmap_px(ppgtt, pt); 748 749 pte = 0; 750 if (++pde == I915_PDES) { 751 if (++pdpe == I915_PDPES_PER_PDP(vm->dev)) 752 break; 753 pde = 0; 754 } 755 } 756 } 757 758 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 759 uint64_t start, 760 uint64_t length, 761 bool use_scratch) 762 { 763 struct i915_hw_ppgtt *ppgtt = 764 container_of(vm, struct i915_hw_ppgtt, base); 765 gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), 766 I915_CACHE_LLC, use_scratch); 767 768 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 769 gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length, 770 scratch_pte); 771 } else { 772 uint64_t pml4e; 773 struct i915_page_directory_pointer *pdp; 774 775 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { 776 gen8_ppgtt_clear_pte_range(vm, pdp, start, length, 777 scratch_pte); 778 } 779 } 780 } 781 782 static void 783 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, 784 struct i915_page_directory_pointer *pdp, 785 struct sg_page_iter *sg_iter, 786 uint64_t start, 787 enum i915_cache_level cache_level) 788 { 789 struct i915_hw_ppgtt *ppgtt = 790 container_of(vm, struct i915_hw_ppgtt, base); 791 gen8_pte_t *pt_vaddr; 792 unsigned pdpe = gen8_pdpe_index(start); 793 unsigned pde = gen8_pde_index(start); 794 unsigned pte = gen8_pte_index(start); 795 796 pt_vaddr = NULL; 797 798 while (__sg_page_iter_next(sg_iter)) { 799 if (pt_vaddr == NULL) { 800 struct i915_page_directory *pd = pdp->page_directory[pdpe]; 801 struct i915_page_table *pt = pd->page_table[pde]; 802 pt_vaddr = kmap_px(pt); 803 } 804 805 pt_vaddr[pte] = 806 gen8_pte_encode(sg_page_iter_dma_address(sg_iter), 807 cache_level, true); 808 if (++pte == GEN8_PTES) { 809 kunmap_px(ppgtt, pt_vaddr); 810 pt_vaddr = NULL; 811 if (++pde == I915_PDES) { 812 if (++pdpe == I915_PDPES_PER_PDP(vm->dev)) 813 break; 814 pde = 0; 815 } 816 pte = 0; 817 } 818 } 819 820 if (pt_vaddr) 821 kunmap_px(ppgtt, pt_vaddr); 822 } 823 824 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 825 struct sg_table *pages, 826 uint64_t start, 827 enum i915_cache_level cache_level, 828 u32 unused) 829 { 830 struct i915_hw_ppgtt *ppgtt = 831 container_of(vm, struct i915_hw_ppgtt, base); 832 struct sg_page_iter sg_iter; 833 834 __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); 835 836 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 837 gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start, 838 cache_level); 839 } else { 840 struct i915_page_directory_pointer *pdp; 841 uint64_t pml4e; 842 uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT; 843 844 gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { 845 gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, 846 start, cache_level); 847 } 848 } 849 } 850 851 static void gen8_free_page_tables(struct drm_device *dev, 852 struct i915_page_directory *pd) 853 { 854 int i; 855 856 if (!px_page(pd)) 857 return; 858 859 for_each_set_bit(i, pd->used_pdes, I915_PDES) { 860 if (WARN_ON(!pd->page_table[i])) 861 continue; 862 863 free_pt(dev, pd->page_table[i]); 864 pd->page_table[i] = NULL; 865 } 866 } 867 868 static int gen8_init_scratch(struct i915_address_space *vm) 869 { 870 struct drm_device *dev = vm->dev; 871 872 vm->scratch_page = alloc_scratch_page(dev); 873 if (IS_ERR(vm->scratch_page)) 874 return PTR_ERR(vm->scratch_page); 875 876 vm->scratch_pt = alloc_pt(dev); 877 if (IS_ERR(vm->scratch_pt)) { 878 free_scratch_page(dev, vm->scratch_page); 879 return PTR_ERR(vm->scratch_pt); 880 } 881 882 vm->scratch_pd = alloc_pd(dev); 883 if (IS_ERR(vm->scratch_pd)) { 884 free_pt(dev, vm->scratch_pt); 885 free_scratch_page(dev, vm->scratch_page); 886 return PTR_ERR(vm->scratch_pd); 887 } 888 889 if (USES_FULL_48BIT_PPGTT(dev)) { 890 vm->scratch_pdp = alloc_pdp(dev); 891 if (IS_ERR(vm->scratch_pdp)) { 892 free_pd(dev, vm->scratch_pd); 893 free_pt(dev, vm->scratch_pt); 894 free_scratch_page(dev, vm->scratch_page); 895 return PTR_ERR(vm->scratch_pdp); 896 } 897 } 898 899 gen8_initialize_pt(vm, vm->scratch_pt); 900 gen8_initialize_pd(vm, vm->scratch_pd); 901 if (USES_FULL_48BIT_PPGTT(dev)) 902 gen8_initialize_pdp(vm, vm->scratch_pdp); 903 904 return 0; 905 } 906 907 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) 908 { 909 enum vgt_g2v_type msg; 910 struct drm_device *dev = ppgtt->base.dev; 911 struct drm_i915_private *dev_priv = dev->dev_private; 912 int i; 913 914 if (USES_FULL_48BIT_PPGTT(dev)) { 915 u64 daddr = px_dma(&ppgtt->pml4); 916 917 I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); 918 I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); 919 920 msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : 921 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); 922 } else { 923 for (i = 0; i < GEN8_LEGACY_PDPES; i++) { 924 u64 daddr = i915_page_dir_dma_addr(ppgtt, i); 925 926 I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); 927 I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); 928 } 929 930 msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : 931 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); 932 } 933 934 I915_WRITE(vgtif_reg(g2v_notify), msg); 935 936 return 0; 937 } 938 939 static void gen8_free_scratch(struct i915_address_space *vm) 940 { 941 struct drm_device *dev = vm->dev; 942 943 if (USES_FULL_48BIT_PPGTT(dev)) 944 free_pdp(dev, vm->scratch_pdp); 945 free_pd(dev, vm->scratch_pd); 946 free_pt(dev, vm->scratch_pt); 947 free_scratch_page(dev, vm->scratch_page); 948 } 949 950 static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev, 951 struct i915_page_directory_pointer *pdp) 952 { 953 int i; 954 955 for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { 956 if (WARN_ON(!pdp->page_directory[i])) 957 continue; 958 959 gen8_free_page_tables(dev, pdp->page_directory[i]); 960 free_pd(dev, pdp->page_directory[i]); 961 } 962 963 free_pdp(dev, pdp); 964 } 965 966 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) 967 { 968 int i; 969 970 for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { 971 if (WARN_ON(!ppgtt->pml4.pdps[i])) 972 continue; 973 974 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]); 975 } 976 977 cleanup_px(ppgtt->base.dev, &ppgtt->pml4); 978 } 979 980 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 981 { 982 struct i915_hw_ppgtt *ppgtt = 983 container_of(vm, struct i915_hw_ppgtt, base); 984 985 if (intel_vgpu_active(vm->dev)) 986 gen8_ppgtt_notify_vgt(ppgtt, false); 987 988 if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) 989 gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp); 990 else 991 gen8_ppgtt_cleanup_4lvl(ppgtt); 992 993 gen8_free_scratch(vm); 994 } 995 996 /** 997 * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. 998 * @vm: Master vm structure. 999 * @pd: Page directory for this address range. 1000 * @start: Starting virtual address to begin allocations. 1001 * @length: Size of the allocations. 1002 * @new_pts: Bitmap set by function with new allocations. Likely used by the 1003 * caller to free on error. 1004 * 1005 * Allocate the required number of page tables. Extremely similar to 1006 * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by 1007 * the page directory boundary (instead of the page directory pointer). That 1008 * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is 1009 * possible, and likely that the caller will need to use multiple calls of this 1010 * function to achieve the appropriate allocation. 1011 * 1012 * Return: 0 if success; negative error code otherwise. 1013 */ 1014 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, 1015 struct i915_page_directory *pd, 1016 uint64_t start, 1017 uint64_t length, 1018 unsigned long *new_pts) 1019 { 1020 struct drm_device *dev = vm->dev; 1021 struct i915_page_table *pt; 1022 uint32_t pde; 1023 1024 gen8_for_each_pde(pt, pd, start, length, pde) { 1025 /* Don't reallocate page tables */ 1026 if (test_bit(pde, pd->used_pdes)) { 1027 /* Scratch is never allocated this way */ 1028 WARN_ON(pt == vm->scratch_pt); 1029 continue; 1030 } 1031 1032 pt = alloc_pt(dev); 1033 if (IS_ERR(pt)) 1034 goto unwind_out; 1035 1036 gen8_initialize_pt(vm, pt); 1037 pd->page_table[pde] = pt; 1038 __set_bit(pde, new_pts); 1039 trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); 1040 } 1041 1042 return 0; 1043 1044 unwind_out: 1045 for_each_set_bit(pde, new_pts, I915_PDES) 1046 free_pt(dev, pd->page_table[pde]); 1047 1048 return -ENOMEM; 1049 } 1050 1051 /** 1052 * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range. 1053 * @vm: Master vm structure. 1054 * @pdp: Page directory pointer for this address range. 1055 * @start: Starting virtual address to begin allocations. 1056 * @length: Size of the allocations. 1057 * @new_pds: Bitmap set by function with new allocations. Likely used by the 1058 * caller to free on error. 1059 * 1060 * Allocate the required number of page directories starting at the pde index of 1061 * @start, and ending at the pde index @start + @length. This function will skip 1062 * over already allocated page directories within the range, and only allocate 1063 * new ones, setting the appropriate pointer within the pdp as well as the 1064 * correct position in the bitmap @new_pds. 1065 * 1066 * The function will only allocate the pages within the range for a give page 1067 * directory pointer. In other words, if @start + @length straddles a virtually 1068 * addressed PDP boundary (512GB for 4k pages), there will be more allocations 1069 * required by the caller, This is not currently possible, and the BUG in the 1070 * code will prevent it. 1071 * 1072 * Return: 0 if success; negative error code otherwise. 1073 */ 1074 static int 1075 gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, 1076 struct i915_page_directory_pointer *pdp, 1077 uint64_t start, 1078 uint64_t length, 1079 unsigned long *new_pds) 1080 { 1081 struct drm_device *dev = vm->dev; 1082 struct i915_page_directory *pd; 1083 uint32_t pdpe; 1084 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1085 1086 WARN_ON(!bitmap_empty(new_pds, pdpes)); 1087 1088 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1089 if (test_bit(pdpe, pdp->used_pdpes)) 1090 continue; 1091 1092 pd = alloc_pd(dev); 1093 if (IS_ERR(pd)) 1094 goto unwind_out; 1095 1096 gen8_initialize_pd(vm, pd); 1097 pdp->page_directory[pdpe] = pd; 1098 __set_bit(pdpe, new_pds); 1099 trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT); 1100 } 1101 1102 return 0; 1103 1104 unwind_out: 1105 for_each_set_bit(pdpe, new_pds, pdpes) 1106 free_pd(dev, pdp->page_directory[pdpe]); 1107 1108 return -ENOMEM; 1109 } 1110 1111 /** 1112 * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. 1113 * @vm: Master vm structure. 1114 * @pml4: Page map level 4 for this address range. 1115 * @start: Starting virtual address to begin allocations. 1116 * @length: Size of the allocations. 1117 * @new_pdps: Bitmap set by function with new allocations. Likely used by the 1118 * caller to free on error. 1119 * 1120 * Allocate the required number of page directory pointers. Extremely similar to 1121 * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). 1122 * The main difference is here we are limited by the pml4 boundary (instead of 1123 * the page directory pointer). 1124 * 1125 * Return: 0 if success; negative error code otherwise. 1126 */ 1127 static int 1128 gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, 1129 struct i915_pml4 *pml4, 1130 uint64_t start, 1131 uint64_t length, 1132 unsigned long *new_pdps) 1133 { 1134 struct drm_device *dev = vm->dev; 1135 struct i915_page_directory_pointer *pdp; 1136 uint32_t pml4e; 1137 1138 WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); 1139 1140 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1141 if (!test_bit(pml4e, pml4->used_pml4es)) { 1142 pdp = alloc_pdp(dev); 1143 if (IS_ERR(pdp)) 1144 goto unwind_out; 1145 1146 gen8_initialize_pdp(vm, pdp); 1147 pml4->pdps[pml4e] = pdp; 1148 __set_bit(pml4e, new_pdps); 1149 trace_i915_page_directory_pointer_entry_alloc(vm, 1150 pml4e, 1151 start, 1152 GEN8_PML4E_SHIFT); 1153 } 1154 } 1155 1156 return 0; 1157 1158 unwind_out: 1159 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1160 free_pdp(dev, pml4->pdps[pml4e]); 1161 1162 return -ENOMEM; 1163 } 1164 1165 static void 1166 free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts) 1167 { 1168 kfree(new_pts); 1169 kfree(new_pds); 1170 } 1171 1172 /* Fills in the page directory bitmap, and the array of page tables bitmap. Both 1173 * of these are based on the number of PDPEs in the system. 1174 */ 1175 static 1176 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, 1177 unsigned long **new_pts, 1178 uint32_t pdpes) 1179 { 1180 unsigned long *pds; 1181 unsigned long *pts; 1182 1183 pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY); 1184 if (!pds) 1185 return -ENOMEM; 1186 1187 pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long), 1188 GFP_TEMPORARY); 1189 if (!pts) 1190 goto err_out; 1191 1192 *new_pds = pds; 1193 *new_pts = pts; 1194 1195 return 0; 1196 1197 err_out: 1198 free_gen8_temp_bitmaps(pds, pts); 1199 return -ENOMEM; 1200 } 1201 1202 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify 1203 * the page table structures, we mark them dirty so that 1204 * context switching/execlist queuing code takes extra steps 1205 * to ensure that tlbs are flushed. 1206 */ 1207 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) 1208 { 1209 ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask; 1210 } 1211 1212 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, 1213 struct i915_page_directory_pointer *pdp, 1214 uint64_t start, 1215 uint64_t length) 1216 { 1217 struct i915_hw_ppgtt *ppgtt = 1218 container_of(vm, struct i915_hw_ppgtt, base); 1219 unsigned long *new_page_dirs, *new_page_tables; 1220 struct drm_device *dev = vm->dev; 1221 struct i915_page_directory *pd; 1222 const uint64_t orig_start = start; 1223 const uint64_t orig_length = length; 1224 uint32_t pdpe; 1225 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1226 int ret; 1227 1228 /* Wrap is never okay since we can only represent 48b, and we don't 1229 * actually use the other side of the canonical address space. 1230 */ 1231 if (WARN_ON(start + length < start)) 1232 return -ENODEV; 1233 1234 if (WARN_ON(start + length > vm->total)) 1235 return -ENODEV; 1236 1237 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1238 if (ret) 1239 return ret; 1240 1241 /* Do the allocations first so we can easily bail out */ 1242 ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, 1243 new_page_dirs); 1244 if (ret) { 1245 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1246 return ret; 1247 } 1248 1249 /* For every page directory referenced, allocate page tables */ 1250 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1251 ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length, 1252 new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES)); 1253 if (ret) 1254 goto err_out; 1255 } 1256 1257 start = orig_start; 1258 length = orig_length; 1259 1260 /* Allocations have completed successfully, so set the bitmaps, and do 1261 * the mappings. */ 1262 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1263 gen8_pde_t *const page_directory = kmap_px(pd); 1264 struct i915_page_table *pt; 1265 uint64_t pd_len = length; 1266 uint64_t pd_start = start; 1267 uint32_t pde; 1268 1269 /* Every pd should be allocated, we just did that above. */ 1270 WARN_ON(!pd); 1271 1272 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1273 /* Same reasoning as pd */ 1274 WARN_ON(!pt); 1275 WARN_ON(!pd_len); 1276 WARN_ON(!gen8_pte_count(pd_start, pd_len)); 1277 1278 /* Set our used ptes within the page table */ 1279 bitmap_set(pt->used_ptes, 1280 gen8_pte_index(pd_start), 1281 gen8_pte_count(pd_start, pd_len)); 1282 1283 /* Our pde is now pointing to the pagetable, pt */ 1284 __set_bit(pde, pd->used_pdes); 1285 1286 /* Map the PDE to the page table */ 1287 page_directory[pde] = gen8_pde_encode(px_dma(pt), 1288 I915_CACHE_LLC); 1289 trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, 1290 gen8_pte_index(start), 1291 gen8_pte_count(start, length), 1292 GEN8_PTES); 1293 1294 /* NB: We haven't yet mapped ptes to pages. At this 1295 * point we're still relying on insert_entries() */ 1296 } 1297 1298 kunmap_px(ppgtt, page_directory); 1299 __set_bit(pdpe, pdp->used_pdpes); 1300 gen8_setup_page_directory(ppgtt, pdp, pd, pdpe); 1301 } 1302 1303 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1304 mark_tlbs_dirty(ppgtt); 1305 return 0; 1306 1307 err_out: 1308 while (pdpe--) { 1309 unsigned long temp; 1310 1311 for_each_set_bit(temp, new_page_tables + pdpe * 1312 BITS_TO_LONGS(I915_PDES), I915_PDES) 1313 free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]); 1314 } 1315 1316 for_each_set_bit(pdpe, new_page_dirs, pdpes) 1317 free_pd(dev, pdp->page_directory[pdpe]); 1318 1319 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1320 mark_tlbs_dirty(ppgtt); 1321 return ret; 1322 } 1323 1324 static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, 1325 struct i915_pml4 *pml4, 1326 uint64_t start, 1327 uint64_t length) 1328 { 1329 DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); 1330 struct i915_hw_ppgtt *ppgtt = 1331 container_of(vm, struct i915_hw_ppgtt, base); 1332 struct i915_page_directory_pointer *pdp; 1333 uint64_t pml4e; 1334 int ret = 0; 1335 1336 /* Do the pml4 allocations first, so we don't need to track the newly 1337 * allocated tables below the pdp */ 1338 bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); 1339 1340 /* The pagedirectory and pagetable allocations are done in the shared 3 1341 * and 4 level code. Just allocate the pdps. 1342 */ 1343 ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, 1344 new_pdps); 1345 if (ret) 1346 return ret; 1347 1348 WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, 1349 "The allocation has spanned more than 512GB. " 1350 "It is highly likely this is incorrect."); 1351 1352 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1353 WARN_ON(!pdp); 1354 1355 ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); 1356 if (ret) 1357 goto err_out; 1358 1359 gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e); 1360 } 1361 1362 bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, 1363 GEN8_PML4ES_PER_PML4); 1364 1365 return 0; 1366 1367 err_out: 1368 for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) 1369 gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]); 1370 1371 return ret; 1372 } 1373 1374 static int gen8_alloc_va_range(struct i915_address_space *vm, 1375 uint64_t start, uint64_t length) 1376 { 1377 struct i915_hw_ppgtt *ppgtt = 1378 container_of(vm, struct i915_hw_ppgtt, base); 1379 1380 if (USES_FULL_48BIT_PPGTT(vm->dev)) 1381 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); 1382 else 1383 return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); 1384 } 1385 1386 static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, 1387 uint64_t start, uint64_t length, 1388 gen8_pte_t scratch_pte, 1389 struct seq_file *m) 1390 { 1391 struct i915_page_directory *pd; 1392 uint32_t pdpe; 1393 1394 gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { 1395 struct i915_page_table *pt; 1396 uint64_t pd_len = length; 1397 uint64_t pd_start = start; 1398 uint32_t pde; 1399 1400 if (!test_bit(pdpe, pdp->used_pdpes)) 1401 continue; 1402 1403 seq_printf(m, "\tPDPE #%d\n", pdpe); 1404 gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { 1405 uint32_t pte; 1406 gen8_pte_t *pt_vaddr; 1407 1408 if (!test_bit(pde, pd->used_pdes)) 1409 continue; 1410 1411 pt_vaddr = kmap_px(pt); 1412 for (pte = 0; pte < GEN8_PTES; pte += 4) { 1413 uint64_t va = 1414 (pdpe << GEN8_PDPE_SHIFT) | 1415 (pde << GEN8_PDE_SHIFT) | 1416 (pte << GEN8_PTE_SHIFT); 1417 int i; 1418 bool found = false; 1419 1420 for (i = 0; i < 4; i++) 1421 if (pt_vaddr[pte + i] != scratch_pte) 1422 found = true; 1423 if (!found) 1424 continue; 1425 1426 seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte); 1427 for (i = 0; i < 4; i++) { 1428 if (pt_vaddr[pte + i] != scratch_pte) 1429 seq_printf(m, " %llx", pt_vaddr[pte + i]); 1430 else 1431 seq_puts(m, " SCRATCH "); 1432 } 1433 seq_puts(m, "\n"); 1434 } 1435 /* don't use kunmap_px, it could trigger 1436 * an unnecessary flush. 1437 */ 1438 kunmap_atomic(pt_vaddr); 1439 } 1440 } 1441 } 1442 1443 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1444 { 1445 struct i915_address_space *vm = &ppgtt->base; 1446 uint64_t start = ppgtt->base.start; 1447 uint64_t length = ppgtt->base.total; 1448 gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), 1449 I915_CACHE_LLC, true); 1450 1451 if (!USES_FULL_48BIT_PPGTT(vm->dev)) { 1452 gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); 1453 } else { 1454 uint64_t pml4e; 1455 struct i915_pml4 *pml4 = &ppgtt->pml4; 1456 struct i915_page_directory_pointer *pdp; 1457 1458 gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { 1459 if (!test_bit(pml4e, pml4->used_pml4es)) 1460 continue; 1461 1462 seq_printf(m, " PML4E #%llu\n", pml4e); 1463 gen8_dump_pdp(pdp, start, length, scratch_pte, m); 1464 } 1465 } 1466 } 1467 1468 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) 1469 { 1470 unsigned long *new_page_dirs, *new_page_tables; 1471 uint32_t pdpes = I915_PDPES_PER_PDP(dev); 1472 int ret; 1473 1474 /* We allocate temp bitmap for page tables for no gain 1475 * but as this is for init only, lets keep the things simple 1476 */ 1477 ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); 1478 if (ret) 1479 return ret; 1480 1481 /* Allocate for all pdps regardless of how the ppgtt 1482 * was defined. 1483 */ 1484 ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp, 1485 0, 1ULL << 32, 1486 new_page_dirs); 1487 if (!ret) 1488 *ppgtt->pdp.used_pdpes = *new_page_dirs; 1489 1490 free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); 1491 1492 return ret; 1493 } 1494 1495 /* 1496 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 1497 * with a net effect resembling a 2-level page table in normal x86 terms. Each 1498 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 1499 * space. 1500 * 1501 */ 1502 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1503 { 1504 int ret; 1505 1506 ret = gen8_init_scratch(&ppgtt->base); 1507 if (ret) 1508 return ret; 1509 1510 ppgtt->base.start = 0; 1511 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 1512 ppgtt->base.allocate_va_range = gen8_alloc_va_range; 1513 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 1514 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 1515 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 1516 ppgtt->base.bind_vma = ppgtt_bind_vma; 1517 ppgtt->debug_dump = gen8_dump_ppgtt; 1518 1519 if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { 1520 ret = setup_px(ppgtt->base.dev, &ppgtt->pml4); 1521 if (ret) 1522 goto free_scratch; 1523 1524 gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); 1525 1526 ppgtt->base.total = 1ULL << 48; 1527 ppgtt->switch_mm = gen8_48b_mm_switch; 1528 } else { 1529 ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp); 1530 if (ret) 1531 goto free_scratch; 1532 1533 ppgtt->base.total = 1ULL << 32; 1534 ppgtt->switch_mm = gen8_legacy_mm_switch; 1535 trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 1536 0, 0, 1537 GEN8_PML4E_SHIFT); 1538 1539 if (intel_vgpu_active(ppgtt->base.dev)) { 1540 ret = gen8_preallocate_top_level_pdps(ppgtt); 1541 if (ret) 1542 goto free_scratch; 1543 } 1544 } 1545 1546 if (intel_vgpu_active(ppgtt->base.dev)) 1547 gen8_ppgtt_notify_vgt(ppgtt, true); 1548 1549 return 0; 1550 1551 free_scratch: 1552 gen8_free_scratch(&ppgtt->base); 1553 return ret; 1554 } 1555 1556 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 1557 { 1558 struct i915_address_space *vm = &ppgtt->base; 1559 struct i915_page_table *unused; 1560 gen6_pte_t scratch_pte; 1561 uint32_t pd_entry; 1562 uint32_t pte, pde, temp; 1563 uint32_t start = ppgtt->base.start, length = ppgtt->base.total; 1564 1565 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), 1566 I915_CACHE_LLC, true, 0); 1567 1568 gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) { 1569 u32 expected; 1570 gen6_pte_t *pt_vaddr; 1571 const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]); 1572 pd_entry = readl(ppgtt->pd_addr + pde); 1573 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 1574 1575 if (pd_entry != expected) 1576 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 1577 pde, 1578 pd_entry, 1579 expected); 1580 seq_printf(m, "\tPDE: %x\n", pd_entry); 1581 1582 pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]); 1583 1584 for (pte = 0; pte < GEN6_PTES; pte+=4) { 1585 unsigned long va = 1586 (pde * PAGE_SIZE * GEN6_PTES) + 1587 (pte * PAGE_SIZE); 1588 int i; 1589 bool found = false; 1590 for (i = 0; i < 4; i++) 1591 if (pt_vaddr[pte + i] != scratch_pte) 1592 found = true; 1593 if (!found) 1594 continue; 1595 1596 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 1597 for (i = 0; i < 4; i++) { 1598 if (pt_vaddr[pte + i] != scratch_pte) 1599 seq_printf(m, " %08x", pt_vaddr[pte + i]); 1600 else 1601 seq_puts(m, " SCRATCH "); 1602 } 1603 seq_puts(m, "\n"); 1604 } 1605 kunmap_px(ppgtt, pt_vaddr); 1606 } 1607 } 1608 1609 /* Write pde (index) from the page directory @pd to the page table @pt */ 1610 static void gen6_write_pde(struct i915_page_directory *pd, 1611 const int pde, struct i915_page_table *pt) 1612 { 1613 /* Caller needs to make sure the write completes if necessary */ 1614 struct i915_hw_ppgtt *ppgtt = 1615 container_of(pd, struct i915_hw_ppgtt, pd); 1616 u32 pd_entry; 1617 1618 pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt)); 1619 pd_entry |= GEN6_PDE_VALID; 1620 1621 writel(pd_entry, ppgtt->pd_addr + pde); 1622 } 1623 1624 /* Write all the page tables found in the ppgtt structure to incrementing page 1625 * directories. */ 1626 static void gen6_write_page_range(struct drm_i915_private *dev_priv, 1627 struct i915_page_directory *pd, 1628 uint32_t start, uint32_t length) 1629 { 1630 struct i915_page_table *pt; 1631 uint32_t pde, temp; 1632 1633 gen6_for_each_pde(pt, pd, start, length, temp, pde) 1634 gen6_write_pde(pd, pde, pt); 1635 1636 /* Make sure write is complete before other code can use this page 1637 * table. Also require for WC mapped PTEs */ 1638 readl(dev_priv->gtt.gsm); 1639 } 1640 1641 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) 1642 { 1643 BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); 1644 1645 return (ppgtt->pd.base.ggtt_offset / 64) << 16; 1646 } 1647 1648 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 1649 struct drm_i915_gem_request *req) 1650 { 1651 struct intel_engine_cs *ring = req->ring; 1652 int ret; 1653 1654 /* NB: TLBs must be flushed and invalidated before a switch */ 1655 ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 1656 if (ret) 1657 return ret; 1658 1659 ret = intel_ring_begin(req, 6); 1660 if (ret) 1661 return ret; 1662 1663 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 1664 intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(ring)); 1665 intel_ring_emit(ring, PP_DIR_DCLV_2G); 1666 intel_ring_emit_reg(ring, RING_PP_DIR_BASE(ring)); 1667 intel_ring_emit(ring, get_pd_offset(ppgtt)); 1668 intel_ring_emit(ring, MI_NOOP); 1669 intel_ring_advance(ring); 1670 1671 return 0; 1672 } 1673 1674 static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt, 1675 struct drm_i915_gem_request *req) 1676 { 1677 struct intel_engine_cs *ring = req->ring; 1678 struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); 1679 1680 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 1681 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 1682 return 0; 1683 } 1684 1685 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 1686 struct drm_i915_gem_request *req) 1687 { 1688 struct intel_engine_cs *ring = req->ring; 1689 int ret; 1690 1691 /* NB: TLBs must be flushed and invalidated before a switch */ 1692 ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 1693 if (ret) 1694 return ret; 1695 1696 ret = intel_ring_begin(req, 6); 1697 if (ret) 1698 return ret; 1699 1700 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 1701 intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(ring)); 1702 intel_ring_emit(ring, PP_DIR_DCLV_2G); 1703 intel_ring_emit_reg(ring, RING_PP_DIR_BASE(ring)); 1704 intel_ring_emit(ring, get_pd_offset(ppgtt)); 1705 intel_ring_emit(ring, MI_NOOP); 1706 intel_ring_advance(ring); 1707 1708 /* XXX: RCS is the only one to auto invalidate the TLBs? */ 1709 if (ring->id != RCS) { 1710 ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 1711 if (ret) 1712 return ret; 1713 } 1714 1715 return 0; 1716 } 1717 1718 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 1719 struct drm_i915_gem_request *req) 1720 { 1721 struct intel_engine_cs *ring = req->ring; 1722 struct drm_device *dev = ppgtt->base.dev; 1723 struct drm_i915_private *dev_priv = dev->dev_private; 1724 1725 1726 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 1727 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 1728 1729 POSTING_READ(RING_PP_DIR_DCLV(ring)); 1730 1731 return 0; 1732 } 1733 1734 static void gen8_ppgtt_enable(struct drm_device *dev) 1735 { 1736 struct drm_i915_private *dev_priv = dev->dev_private; 1737 struct intel_engine_cs *ring; 1738 int j; 1739 1740 for_each_ring(ring, dev_priv, j) { 1741 u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0; 1742 I915_WRITE(RING_MODE_GEN7(ring), 1743 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level)); 1744 } 1745 } 1746 1747 static void gen7_ppgtt_enable(struct drm_device *dev) 1748 { 1749 struct drm_i915_private *dev_priv = dev->dev_private; 1750 struct intel_engine_cs *ring; 1751 uint32_t ecochk, ecobits; 1752 int i; 1753 1754 ecobits = I915_READ(GAC_ECO_BITS); 1755 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 1756 1757 ecochk = I915_READ(GAM_ECOCHK); 1758 if (IS_HASWELL(dev)) { 1759 ecochk |= ECOCHK_PPGTT_WB_HSW; 1760 } else { 1761 ecochk |= ECOCHK_PPGTT_LLC_IVB; 1762 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 1763 } 1764 I915_WRITE(GAM_ECOCHK, ecochk); 1765 1766 for_each_ring(ring, dev_priv, i) { 1767 /* GFX_MODE is per-ring on gen7+ */ 1768 I915_WRITE(RING_MODE_GEN7(ring), 1769 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1770 } 1771 } 1772 1773 static void gen6_ppgtt_enable(struct drm_device *dev) 1774 { 1775 struct drm_i915_private *dev_priv = dev->dev_private; 1776 uint32_t ecochk, gab_ctl, ecobits; 1777 1778 ecobits = I915_READ(GAC_ECO_BITS); 1779 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 1780 ECOBITS_PPGTT_CACHE64B); 1781 1782 gab_ctl = I915_READ(GAB_CTL); 1783 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 1784 1785 ecochk = I915_READ(GAM_ECOCHK); 1786 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 1787 1788 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 1789 } 1790 1791 /* PPGTT support for Sandybdrige/Gen6 and later */ 1792 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 1793 uint64_t start, 1794 uint64_t length, 1795 bool use_scratch) 1796 { 1797 struct i915_hw_ppgtt *ppgtt = 1798 container_of(vm, struct i915_hw_ppgtt, base); 1799 gen6_pte_t *pt_vaddr, scratch_pte; 1800 unsigned first_entry = start >> PAGE_SHIFT; 1801 unsigned num_entries = length >> PAGE_SHIFT; 1802 unsigned act_pt = first_entry / GEN6_PTES; 1803 unsigned first_pte = first_entry % GEN6_PTES; 1804 unsigned last_pte, i; 1805 1806 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), 1807 I915_CACHE_LLC, true, 0); 1808 1809 while (num_entries) { 1810 last_pte = first_pte + num_entries; 1811 if (last_pte > GEN6_PTES) 1812 last_pte = GEN6_PTES; 1813 1814 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1815 1816 for (i = first_pte; i < last_pte; i++) 1817 pt_vaddr[i] = scratch_pte; 1818 1819 kunmap_px(ppgtt, pt_vaddr); 1820 1821 num_entries -= last_pte - first_pte; 1822 first_pte = 0; 1823 act_pt++; 1824 } 1825 } 1826 1827 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 1828 struct sg_table *pages, 1829 uint64_t start, 1830 enum i915_cache_level cache_level, u32 flags) 1831 { 1832 struct i915_hw_ppgtt *ppgtt = 1833 container_of(vm, struct i915_hw_ppgtt, base); 1834 gen6_pte_t *pt_vaddr; 1835 unsigned first_entry = start >> PAGE_SHIFT; 1836 unsigned act_pt = first_entry / GEN6_PTES; 1837 unsigned act_pte = first_entry % GEN6_PTES; 1838 struct sg_page_iter sg_iter; 1839 1840 pt_vaddr = NULL; 1841 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { 1842 if (pt_vaddr == NULL) 1843 pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); 1844 1845 pt_vaddr[act_pte] = 1846 vm->pte_encode(sg_page_iter_dma_address(&sg_iter), 1847 cache_level, true, flags); 1848 1849 if (++act_pte == GEN6_PTES) { 1850 kunmap_px(ppgtt, pt_vaddr); 1851 pt_vaddr = NULL; 1852 act_pt++; 1853 act_pte = 0; 1854 } 1855 } 1856 if (pt_vaddr) 1857 kunmap_px(ppgtt, pt_vaddr); 1858 } 1859 1860 static int gen6_alloc_va_range(struct i915_address_space *vm, 1861 uint64_t start_in, uint64_t length_in) 1862 { 1863 DECLARE_BITMAP(new_page_tables, I915_PDES); 1864 struct drm_device *dev = vm->dev; 1865 struct drm_i915_private *dev_priv = dev->dev_private; 1866 struct i915_hw_ppgtt *ppgtt = 1867 container_of(vm, struct i915_hw_ppgtt, base); 1868 struct i915_page_table *pt; 1869 uint32_t start, length, start_save, length_save; 1870 uint32_t pde, temp; 1871 int ret; 1872 1873 if (WARN_ON(start_in + length_in > ppgtt->base.total)) 1874 return -ENODEV; 1875 1876 start = start_save = start_in; 1877 length = length_save = length_in; 1878 1879 bitmap_zero(new_page_tables, I915_PDES); 1880 1881 /* The allocation is done in two stages so that we can bail out with 1882 * minimal amount of pain. The first stage finds new page tables that 1883 * need allocation. The second stage marks use ptes within the page 1884 * tables. 1885 */ 1886 gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) { 1887 if (pt != vm->scratch_pt) { 1888 WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES)); 1889 continue; 1890 } 1891 1892 /* We've already allocated a page table */ 1893 WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); 1894 1895 pt = alloc_pt(dev); 1896 if (IS_ERR(pt)) { 1897 ret = PTR_ERR(pt); 1898 goto unwind_out; 1899 } 1900 1901 gen6_initialize_pt(vm, pt); 1902 1903 ppgtt->pd.page_table[pde] = pt; 1904 __set_bit(pde, new_page_tables); 1905 trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT); 1906 } 1907 1908 start = start_save; 1909 length = length_save; 1910 1911 gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) { 1912 DECLARE_BITMAP(tmp_bitmap, GEN6_PTES); 1913 1914 bitmap_zero(tmp_bitmap, GEN6_PTES); 1915 bitmap_set(tmp_bitmap, gen6_pte_index(start), 1916 gen6_pte_count(start, length)); 1917 1918 if (__test_and_clear_bit(pde, new_page_tables)) 1919 gen6_write_pde(&ppgtt->pd, pde, pt); 1920 1921 trace_i915_page_table_entry_map(vm, pde, pt, 1922 gen6_pte_index(start), 1923 gen6_pte_count(start, length), 1924 GEN6_PTES); 1925 bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes, 1926 GEN6_PTES); 1927 } 1928 1929 WARN_ON(!bitmap_empty(new_page_tables, I915_PDES)); 1930 1931 /* Make sure write is complete before other code can use this page 1932 * table. Also require for WC mapped PTEs */ 1933 readl(dev_priv->gtt.gsm); 1934 1935 mark_tlbs_dirty(ppgtt); 1936 return 0; 1937 1938 unwind_out: 1939 for_each_set_bit(pde, new_page_tables, I915_PDES) { 1940 struct i915_page_table *pt = ppgtt->pd.page_table[pde]; 1941 1942 ppgtt->pd.page_table[pde] = vm->scratch_pt; 1943 free_pt(vm->dev, pt); 1944 } 1945 1946 mark_tlbs_dirty(ppgtt); 1947 return ret; 1948 } 1949 1950 static int gen6_init_scratch(struct i915_address_space *vm) 1951 { 1952 struct drm_device *dev = vm->dev; 1953 1954 vm->scratch_page = alloc_scratch_page(dev); 1955 if (IS_ERR(vm->scratch_page)) 1956 return PTR_ERR(vm->scratch_page); 1957 1958 vm->scratch_pt = alloc_pt(dev); 1959 if (IS_ERR(vm->scratch_pt)) { 1960 free_scratch_page(dev, vm->scratch_page); 1961 return PTR_ERR(vm->scratch_pt); 1962 } 1963 1964 gen6_initialize_pt(vm, vm->scratch_pt); 1965 1966 return 0; 1967 } 1968 1969 static void gen6_free_scratch(struct i915_address_space *vm) 1970 { 1971 struct drm_device *dev = vm->dev; 1972 1973 free_pt(dev, vm->scratch_pt); 1974 free_scratch_page(dev, vm->scratch_page); 1975 } 1976 1977 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 1978 { 1979 struct i915_hw_ppgtt *ppgtt = 1980 container_of(vm, struct i915_hw_ppgtt, base); 1981 struct i915_page_table *pt; 1982 uint32_t pde; 1983 1984 drm_mm_remove_node(&ppgtt->node); 1985 1986 gen6_for_all_pdes(pt, ppgtt, pde) { 1987 if (pt != vm->scratch_pt) 1988 free_pt(ppgtt->base.dev, pt); 1989 } 1990 1991 gen6_free_scratch(vm); 1992 } 1993 1994 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 1995 { 1996 struct i915_address_space *vm = &ppgtt->base; 1997 struct drm_device *dev = ppgtt->base.dev; 1998 struct drm_i915_private *dev_priv = dev->dev_private; 1999 bool retried = false; 2000 int ret; 2001 2002 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 2003 * allocator works in address space sizes, so it's multiplied by page 2004 * size. We allocate at the top of the GTT to avoid fragmentation. 2005 */ 2006 BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm)); 2007 2008 ret = gen6_init_scratch(vm); 2009 if (ret) 2010 return ret; 2011 2012 alloc: 2013 ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm, 2014 &ppgtt->node, GEN6_PD_SIZE, 2015 GEN6_PD_ALIGN, 0, 2016 0, dev_priv->gtt.base.total, 2017 DRM_MM_TOPDOWN); 2018 if (ret == -ENOSPC && !retried) { 2019 ret = i915_gem_evict_something(dev, &dev_priv->gtt.base, 2020 GEN6_PD_SIZE, GEN6_PD_ALIGN, 2021 I915_CACHE_NONE, 2022 0, dev_priv->gtt.base.total, 2023 0); 2024 if (ret) 2025 goto err_out; 2026 2027 retried = true; 2028 goto alloc; 2029 } 2030 2031 if (ret) 2032 goto err_out; 2033 2034 2035 if (ppgtt->node.start < dev_priv->gtt.mappable_end) 2036 DRM_DEBUG("Forced to use aperture for PDEs\n"); 2037 2038 return 0; 2039 2040 err_out: 2041 gen6_free_scratch(vm); 2042 return ret; 2043 } 2044 2045 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 2046 { 2047 return gen6_ppgtt_allocate_page_directories(ppgtt); 2048 } 2049 2050 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, 2051 uint64_t start, uint64_t length) 2052 { 2053 struct i915_page_table *unused; 2054 uint32_t pde, temp; 2055 2056 gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) 2057 ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; 2058 } 2059 2060 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 2061 { 2062 struct drm_device *dev = ppgtt->base.dev; 2063 struct drm_i915_private *dev_priv = dev->dev_private; 2064 int ret; 2065 2066 ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; 2067 if (IS_GEN6(dev)) { 2068 ppgtt->switch_mm = gen6_mm_switch; 2069 } else if (IS_HASWELL(dev)) { 2070 ppgtt->switch_mm = hsw_mm_switch; 2071 } else if (IS_GEN7(dev)) { 2072 ppgtt->switch_mm = gen7_mm_switch; 2073 } else 2074 BUG(); 2075 2076 if (intel_vgpu_active(dev)) 2077 ppgtt->switch_mm = vgpu_mm_switch; 2078 2079 ret = gen6_ppgtt_alloc(ppgtt); 2080 if (ret) 2081 return ret; 2082 2083 ppgtt->base.allocate_va_range = gen6_alloc_va_range; 2084 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 2085 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 2086 ppgtt->base.unbind_vma = ppgtt_unbind_vma; 2087 ppgtt->base.bind_vma = ppgtt_bind_vma; 2088 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 2089 ppgtt->base.start = 0; 2090 ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; 2091 ppgtt->debug_dump = gen6_dump_ppgtt; 2092 2093 ppgtt->pd.base.ggtt_offset = 2094 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); 2095 2096 ppgtt->pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm + 2097 ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); 2098 2099 gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); 2100 2101 gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); 2102 2103 DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", 2104 ppgtt->node.size >> 20, 2105 ppgtt->node.start / PAGE_SIZE); 2106 2107 DRM_DEBUG("Adding PPGTT at offset %x\n", 2108 ppgtt->pd.base.ggtt_offset << 10); 2109 2110 return 0; 2111 } 2112 2113 static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) 2114 { 2115 ppgtt->base.dev = dev; 2116 2117 if (INTEL_INFO(dev)->gen < 8) 2118 return gen6_ppgtt_init(ppgtt); 2119 else 2120 return gen8_ppgtt_init(ppgtt); 2121 } 2122 2123 static void i915_address_space_init(struct i915_address_space *vm, 2124 struct drm_i915_private *dev_priv) 2125 { 2126 drm_mm_init(&vm->mm, vm->start, vm->total); 2127 vm->dev = dev_priv->dev; 2128 INIT_LIST_HEAD(&vm->active_list); 2129 INIT_LIST_HEAD(&vm->inactive_list); 2130 list_add_tail(&vm->global_link, &dev_priv->vm_list); 2131 } 2132 2133 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) 2134 { 2135 struct drm_i915_private *dev_priv = dev->dev_private; 2136 int ret = 0; 2137 2138 ret = __hw_ppgtt_init(dev, ppgtt); 2139 if (ret == 0) { 2140 kref_init(&ppgtt->ref); 2141 i915_address_space_init(&ppgtt->base, dev_priv); 2142 } 2143 2144 return ret; 2145 } 2146 2147 int i915_ppgtt_init_hw(struct drm_device *dev) 2148 { 2149 /* In the case of execlists, PPGTT is enabled by the context descriptor 2150 * and the PDPs are contained within the context itself. We don't 2151 * need to do anything here. */ 2152 if (i915.enable_execlists) 2153 return 0; 2154 2155 if (!USES_PPGTT(dev)) 2156 return 0; 2157 2158 if (IS_GEN6(dev)) 2159 gen6_ppgtt_enable(dev); 2160 else if (IS_GEN7(dev)) 2161 gen7_ppgtt_enable(dev); 2162 else if (INTEL_INFO(dev)->gen >= 8) 2163 gen8_ppgtt_enable(dev); 2164 else 2165 MISSING_CASE(INTEL_INFO(dev)->gen); 2166 2167 return 0; 2168 } 2169 2170 int i915_ppgtt_init_ring(struct drm_i915_gem_request *req) 2171 { 2172 struct drm_i915_private *dev_priv = req->ring->dev->dev_private; 2173 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 2174 2175 if (i915.enable_execlists) 2176 return 0; 2177 2178 if (!ppgtt) 2179 return 0; 2180 2181 return ppgtt->switch_mm(ppgtt, req); 2182 } 2183 2184 struct i915_hw_ppgtt * 2185 i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) 2186 { 2187 struct i915_hw_ppgtt *ppgtt; 2188 int ret; 2189 2190 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2191 if (!ppgtt) 2192 return ERR_PTR(-ENOMEM); 2193 2194 ret = i915_ppgtt_init(dev, ppgtt); 2195 if (ret) { 2196 kfree(ppgtt); 2197 return ERR_PTR(ret); 2198 } 2199 2200 ppgtt->file_priv = fpriv; 2201 2202 trace_i915_ppgtt_create(&ppgtt->base); 2203 2204 return ppgtt; 2205 } 2206 2207 void i915_ppgtt_release(struct kref *kref) 2208 { 2209 struct i915_hw_ppgtt *ppgtt = 2210 container_of(kref, struct i915_hw_ppgtt, ref); 2211 2212 trace_i915_ppgtt_release(&ppgtt->base); 2213 2214 /* vmas should already be unbound */ 2215 WARN_ON(!list_empty(&ppgtt->base.active_list)); 2216 WARN_ON(!list_empty(&ppgtt->base.inactive_list)); 2217 2218 list_del(&ppgtt->base.global_link); 2219 drm_mm_takedown(&ppgtt->base.mm); 2220 2221 ppgtt->base.cleanup(&ppgtt->base); 2222 kfree(ppgtt); 2223 } 2224 2225 extern int intel_iommu_gfx_mapped; 2226 /* Certain Gen5 chipsets require require idling the GPU before 2227 * unmapping anything from the GTT when VT-d is enabled. 2228 */ 2229 static bool needs_idle_maps(struct drm_device *dev) 2230 { 2231 #ifdef CONFIG_INTEL_IOMMU 2232 /* Query intel_iommu to see if we need the workaround. Presumably that 2233 * was loaded first. 2234 */ 2235 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) 2236 return true; 2237 #endif 2238 return false; 2239 } 2240 2241 static bool do_idling(struct drm_i915_private *dev_priv) 2242 { 2243 bool ret = dev_priv->mm.interruptible; 2244 2245 if (unlikely(dev_priv->gtt.do_idle_maps)) { 2246 dev_priv->mm.interruptible = false; 2247 if (i915_gpu_idle(dev_priv->dev)) { 2248 DRM_ERROR("Couldn't idle GPU\n"); 2249 /* Wait a bit, in hopes it avoids the hang */ 2250 udelay(10); 2251 } 2252 } 2253 2254 return ret; 2255 } 2256 2257 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) 2258 { 2259 if (unlikely(dev_priv->gtt.do_idle_maps)) 2260 dev_priv->mm.interruptible = interruptible; 2261 } 2262 2263 void i915_check_and_clear_faults(struct drm_device *dev) 2264 { 2265 struct drm_i915_private *dev_priv = dev->dev_private; 2266 struct intel_engine_cs *ring; 2267 int i; 2268 2269 if (INTEL_INFO(dev)->gen < 6) 2270 return; 2271 2272 for_each_ring(ring, dev_priv, i) { 2273 u32 fault_reg; 2274 fault_reg = I915_READ(RING_FAULT_REG(ring)); 2275 if (fault_reg & RING_FAULT_VALID) { 2276 DRM_DEBUG_DRIVER("Unexpected fault\n" 2277 "\tAddr: 0x%08lx\n" 2278 "\tAddress space: %s\n" 2279 "\tSource ID: %d\n" 2280 "\tType: %d\n", 2281 fault_reg & PAGE_MASK, 2282 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 2283 RING_FAULT_SRCID(fault_reg), 2284 RING_FAULT_FAULT_TYPE(fault_reg)); 2285 I915_WRITE(RING_FAULT_REG(ring), 2286 fault_reg & ~RING_FAULT_VALID); 2287 } 2288 } 2289 POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS])); 2290 } 2291 2292 static void i915_ggtt_flush(struct drm_i915_private *dev_priv) 2293 { 2294 if (INTEL_INFO(dev_priv->dev)->gen < 6) { 2295 intel_gtt_chipset_flush(); 2296 } else { 2297 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2298 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2299 } 2300 } 2301 2302 void i915_gem_suspend_gtt_mappings(struct drm_device *dev) 2303 { 2304 struct drm_i915_private *dev_priv = dev->dev_private; 2305 2306 /* Don't bother messing with faults pre GEN6 as we have little 2307 * documentation supporting that it's a good idea. 2308 */ 2309 if (INTEL_INFO(dev)->gen < 6) 2310 return; 2311 2312 i915_check_and_clear_faults(dev); 2313 2314 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 2315 dev_priv->gtt.base.start, 2316 dev_priv->gtt.base.total, 2317 true); 2318 2319 i915_ggtt_flush(dev_priv); 2320 } 2321 2322 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) 2323 { 2324 if (!dma_map_sg(&obj->base.dev->pdev->dev, 2325 obj->pages->sgl, obj->pages->nents, 2326 PCI_DMA_BIDIRECTIONAL)) 2327 return -ENOSPC; 2328 2329 return 0; 2330 } 2331 2332 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) 2333 { 2334 #ifdef writeq 2335 writeq(pte, addr); 2336 #else 2337 iowrite32((u32)pte, addr); 2338 iowrite32(pte >> 32, addr + 4); 2339 #endif 2340 } 2341 2342 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 2343 struct sg_table *st, 2344 uint64_t start, 2345 enum i915_cache_level level, u32 unused) 2346 { 2347 struct drm_i915_private *dev_priv = vm->dev->dev_private; 2348 unsigned first_entry = start >> PAGE_SHIFT; 2349 gen8_pte_t __iomem *gtt_entries = 2350 (gen8_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 2351 int i = 0; 2352 struct sg_page_iter sg_iter; 2353 dma_addr_t addr = 0; /* shut up gcc */ 2354 int rpm_atomic_seq; 2355 2356 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2357 2358 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 2359 addr = sg_dma_address(sg_iter.sg) + 2360 (sg_iter.sg_pgoffset << PAGE_SHIFT); 2361 gen8_set_pte(>t_entries[i], 2362 gen8_pte_encode(addr, level, true)); 2363 i++; 2364 } 2365 2366 /* 2367 * XXX: This serves as a posting read to make sure that the PTE has 2368 * actually been updated. There is some concern that even though 2369 * registers and PTEs are within the same BAR that they are potentially 2370 * of NUMA access patterns. Therefore, even with the way we assume 2371 * hardware should work, we must keep this posting read for paranoia. 2372 */ 2373 if (i != 0) 2374 WARN_ON(readq(>t_entries[i-1]) 2375 != gen8_pte_encode(addr, level, true)); 2376 2377 /* This next bit makes the above posting read even more important. We 2378 * want to flush the TLBs only after we're certain all the PTE updates 2379 * have finished. 2380 */ 2381 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2382 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2383 2384 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2385 } 2386 2387 struct insert_entries { 2388 struct i915_address_space *vm; 2389 struct sg_table *st; 2390 uint64_t start; 2391 enum i915_cache_level level; 2392 u32 flags; 2393 }; 2394 2395 static int gen8_ggtt_insert_entries__cb(void *_arg) 2396 { 2397 struct insert_entries *arg = _arg; 2398 gen8_ggtt_insert_entries(arg->vm, arg->st, 2399 arg->start, arg->level, arg->flags); 2400 return 0; 2401 } 2402 2403 static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, 2404 struct sg_table *st, 2405 uint64_t start, 2406 enum i915_cache_level level, 2407 u32 flags) 2408 { 2409 struct insert_entries arg = { vm, st, start, level, flags }; 2410 stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); 2411 } 2412 2413 /* 2414 * Binds an object into the global gtt with the specified cache level. The object 2415 * will be accessible to the GPU via commands whose operands reference offsets 2416 * within the global GTT as well as accessible by the GPU through the GMADR 2417 * mapped BAR (dev_priv->mm.gtt->gtt). 2418 */ 2419 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 2420 struct sg_table *st, 2421 uint64_t start, 2422 enum i915_cache_level level, u32 flags) 2423 { 2424 struct drm_i915_private *dev_priv = vm->dev->dev_private; 2425 unsigned first_entry = start >> PAGE_SHIFT; 2426 gen6_pte_t __iomem *gtt_entries = 2427 (gen6_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 2428 int i = 0; 2429 struct sg_page_iter sg_iter; 2430 dma_addr_t addr = 0; 2431 int rpm_atomic_seq; 2432 2433 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2434 2435 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 2436 addr = sg_page_iter_dma_address(&sg_iter); 2437 iowrite32(vm->pte_encode(addr, level, true, flags), >t_entries[i]); 2438 i++; 2439 } 2440 2441 /* XXX: This serves as a posting read to make sure that the PTE has 2442 * actually been updated. There is some concern that even though 2443 * registers and PTEs are within the same BAR that they are potentially 2444 * of NUMA access patterns. Therefore, even with the way we assume 2445 * hardware should work, we must keep this posting read for paranoia. 2446 */ 2447 if (i != 0) { 2448 unsigned long gtt = readl(>t_entries[i-1]); 2449 WARN_ON(gtt != vm->pte_encode(addr, level, true, flags)); 2450 } 2451 2452 /* This next bit makes the above posting read even more important. We 2453 * want to flush the TLBs only after we're certain all the PTE updates 2454 * have finished. 2455 */ 2456 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 2457 POSTING_READ(GFX_FLSH_CNTL_GEN6); 2458 2459 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2460 } 2461 2462 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 2463 uint64_t start, 2464 uint64_t length, 2465 bool use_scratch) 2466 { 2467 struct drm_i915_private *dev_priv = vm->dev->dev_private; 2468 unsigned first_entry = start >> PAGE_SHIFT; 2469 unsigned num_entries = length >> PAGE_SHIFT; 2470 gen8_pte_t scratch_pte, __iomem *gtt_base = 2471 (gen8_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 2472 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 2473 int i; 2474 int rpm_atomic_seq; 2475 2476 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2477 2478 if (WARN(num_entries > max_entries, 2479 "First entry = %d; Num entries = %d (max=%d)\n", 2480 first_entry, num_entries, max_entries)) 2481 num_entries = max_entries; 2482 2483 scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), 2484 I915_CACHE_LLC, 2485 use_scratch); 2486 for (i = 0; i < num_entries; i++) 2487 gen8_set_pte(>t_base[i], scratch_pte); 2488 readl(gtt_base); 2489 2490 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2491 } 2492 2493 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 2494 uint64_t start, 2495 uint64_t length, 2496 bool use_scratch) 2497 { 2498 struct drm_i915_private *dev_priv = vm->dev->dev_private; 2499 unsigned first_entry = start >> PAGE_SHIFT; 2500 unsigned num_entries = length >> PAGE_SHIFT; 2501 gen6_pte_t scratch_pte, __iomem *gtt_base = 2502 (gen6_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 2503 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 2504 int i; 2505 int rpm_atomic_seq; 2506 2507 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2508 2509 if (WARN(num_entries > max_entries, 2510 "First entry = %d; Num entries = %d (max=%d)\n", 2511 first_entry, num_entries, max_entries)) 2512 num_entries = max_entries; 2513 2514 scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), 2515 I915_CACHE_LLC, use_scratch, 0); 2516 2517 for (i = 0; i < num_entries; i++) 2518 iowrite32(scratch_pte, >t_base[i]); 2519 readl(gtt_base); 2520 2521 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2522 } 2523 2524 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 2525 struct sg_table *pages, 2526 uint64_t start, 2527 enum i915_cache_level cache_level, u32 unused) 2528 { 2529 struct drm_i915_private *dev_priv = vm->dev->dev_private; 2530 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 2531 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 2532 int rpm_atomic_seq; 2533 2534 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2535 2536 intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags); 2537 2538 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2539 2540 } 2541 2542 static void i915_ggtt_clear_range(struct i915_address_space *vm, 2543 uint64_t start, 2544 uint64_t length, 2545 bool unused) 2546 { 2547 struct drm_i915_private *dev_priv = vm->dev->dev_private; 2548 unsigned first_entry = start >> PAGE_SHIFT; 2549 unsigned num_entries = length >> PAGE_SHIFT; 2550 int rpm_atomic_seq; 2551 2552 rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); 2553 2554 intel_gtt_clear_range(first_entry, num_entries); 2555 2556 assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); 2557 } 2558 2559 static int ggtt_bind_vma(struct i915_vma *vma, 2560 enum i915_cache_level cache_level, 2561 u32 flags) 2562 { 2563 struct drm_i915_gem_object *obj = vma->obj; 2564 u32 pte_flags = 0; 2565 int ret; 2566 2567 ret = i915_get_ggtt_vma_pages(vma); 2568 if (ret) 2569 return ret; 2570 2571 /* Currently applicable only to VLV */ 2572 if (obj->gt_ro) 2573 pte_flags |= PTE_READ_ONLY; 2574 2575 vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages, 2576 vma->node.start, 2577 cache_level, pte_flags); 2578 2579 /* 2580 * Without aliasing PPGTT there's no difference between 2581 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally 2582 * upgrade to both bound if we bind either to avoid double-binding. 2583 */ 2584 vma->bound |= GLOBAL_BIND | LOCAL_BIND; 2585 2586 return 0; 2587 } 2588 2589 static int aliasing_gtt_bind_vma(struct i915_vma *vma, 2590 enum i915_cache_level cache_level, 2591 u32 flags) 2592 { 2593 struct drm_device *dev = vma->vm->dev; 2594 struct drm_i915_private *dev_priv = dev->dev_private; 2595 struct drm_i915_gem_object *obj = vma->obj; 2596 struct sg_table *pages = obj->pages; 2597 u32 pte_flags = 0; 2598 int ret; 2599 2600 ret = i915_get_ggtt_vma_pages(vma); 2601 if (ret) 2602 return ret; 2603 pages = vma->ggtt_view.pages; 2604 2605 /* Currently applicable only to VLV */ 2606 if (obj->gt_ro) 2607 pte_flags |= PTE_READ_ONLY; 2608 2609 2610 if (flags & GLOBAL_BIND) { 2611 vma->vm->insert_entries(vma->vm, pages, 2612 vma->node.start, 2613 cache_level, pte_flags); 2614 } 2615 2616 if (flags & LOCAL_BIND) { 2617 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 2618 appgtt->base.insert_entries(&appgtt->base, pages, 2619 vma->node.start, 2620 cache_level, pte_flags); 2621 } 2622 2623 return 0; 2624 } 2625 2626 static void ggtt_unbind_vma(struct i915_vma *vma) 2627 { 2628 struct drm_device *dev = vma->vm->dev; 2629 struct drm_i915_private *dev_priv = dev->dev_private; 2630 struct drm_i915_gem_object *obj = vma->obj; 2631 const uint64_t size = min_t(uint64_t, 2632 obj->base.size, 2633 vma->node.size); 2634 2635 if (vma->bound & GLOBAL_BIND) { 2636 vma->vm->clear_range(vma->vm, 2637 vma->node.start, 2638 size, 2639 true); 2640 } 2641 2642 if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) { 2643 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 2644 2645 appgtt->base.clear_range(&appgtt->base, 2646 vma->node.start, 2647 size, 2648 true); 2649 } 2650 } 2651 2652 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 2653 { 2654 struct drm_device *dev = obj->base.dev; 2655 struct drm_i915_private *dev_priv = dev->dev_private; 2656 bool interruptible; 2657 2658 interruptible = do_idling(dev_priv); 2659 2660 dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents, 2661 PCI_DMA_BIDIRECTIONAL); 2662 2663 undo_idling(dev_priv, interruptible); 2664 } 2665 2666 static void i915_gtt_color_adjust(struct drm_mm_node *node, 2667 unsigned long color, 2668 u64 *start, 2669 u64 *end) 2670 { 2671 if (node->color != color) 2672 *start += 4096; 2673 2674 if (!list_empty(&node->node_list)) { 2675 node = list_entry(node->node_list.next, 2676 struct drm_mm_node, 2677 node_list); 2678 if (node->allocated && node->color != color) 2679 *end -= 4096; 2680 } 2681 } 2682 2683 static int i915_gem_setup_global_gtt(struct drm_device *dev, 2684 u64 start, 2685 u64 mappable_end, 2686 u64 end) 2687 { 2688 /* Let GEM Manage all of the aperture. 2689 * 2690 * However, leave one page at the end still bound to the scratch page. 2691 * There are a number of places where the hardware apparently prefetches 2692 * past the end of the object, and we've seen multiple hangs with the 2693 * GPU head pointer stuck in a batchbuffer bound at the last page of the 2694 * aperture. One page should be enough to keep any prefetching inside 2695 * of the aperture. 2696 */ 2697 struct drm_i915_private *dev_priv = dev->dev_private; 2698 struct i915_address_space *ggtt_vm = &dev_priv->gtt.base; 2699 struct drm_mm_node *entry; 2700 struct drm_i915_gem_object *obj; 2701 unsigned long hole_start, hole_end; 2702 int ret; 2703 2704 BUG_ON(mappable_end > end); 2705 2706 ggtt_vm->start = start; 2707 2708 /* Subtract the guard page before address space initialization to 2709 * shrink the range used by drm_mm */ 2710 ggtt_vm->total = end - start - PAGE_SIZE; 2711 i915_address_space_init(ggtt_vm, dev_priv); 2712 ggtt_vm->total += PAGE_SIZE; 2713 2714 if (intel_vgpu_active(dev)) { 2715 ret = intel_vgt_balloon(dev); 2716 if (ret) 2717 return ret; 2718 } 2719 2720 if (!HAS_LLC(dev)) 2721 ggtt_vm->mm.color_adjust = i915_gtt_color_adjust; 2722 2723 /* Mark any preallocated objects as occupied */ 2724 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 2725 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); 2726 2727 DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n", 2728 i915_gem_obj_ggtt_offset(obj), obj->base.size); 2729 2730 WARN_ON(i915_gem_obj_ggtt_bound(obj)); 2731 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node); 2732 if (ret) { 2733 DRM_DEBUG_KMS("Reservation failed: %i\n", ret); 2734 return ret; 2735 } 2736 vma->bound |= GLOBAL_BIND; 2737 __i915_vma_set_map_and_fenceable(vma); 2738 list_add_tail(&vma->mm_list, &ggtt_vm->inactive_list); 2739 } 2740 2741 /* Clear any non-preallocated blocks */ 2742 drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) { 2743 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 2744 hole_start, hole_end); 2745 ggtt_vm->clear_range(ggtt_vm, hole_start, 2746 hole_end - hole_start, true); 2747 } 2748 2749 /* And finally clear the reserved guard page */ 2750 ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true); 2751 2752 if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) { 2753 struct i915_hw_ppgtt *ppgtt; 2754 2755 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 2756 if (!ppgtt) 2757 return -ENOMEM; 2758 2759 ret = __hw_ppgtt_init(dev, ppgtt); 2760 if (ret) { 2761 ppgtt->base.cleanup(&ppgtt->base); 2762 kfree(ppgtt); 2763 return ret; 2764 } 2765 2766 if (ppgtt->base.allocate_va_range) 2767 ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0, 2768 ppgtt->base.total); 2769 if (ret) { 2770 ppgtt->base.cleanup(&ppgtt->base); 2771 kfree(ppgtt); 2772 return ret; 2773 } 2774 2775 ppgtt->base.clear_range(&ppgtt->base, 2776 ppgtt->base.start, 2777 ppgtt->base.total, 2778 true); 2779 2780 dev_priv->mm.aliasing_ppgtt = ppgtt; 2781 WARN_ON(dev_priv->gtt.base.bind_vma != ggtt_bind_vma); 2782 dev_priv->gtt.base.bind_vma = aliasing_gtt_bind_vma; 2783 } 2784 2785 return 0; 2786 } 2787 2788 void i915_gem_init_global_gtt(struct drm_device *dev) 2789 { 2790 struct drm_i915_private *dev_priv = dev->dev_private; 2791 u64 gtt_size, mappable_size; 2792 2793 gtt_size = dev_priv->gtt.base.total; 2794 mappable_size = dev_priv->gtt.mappable_end; 2795 2796 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); 2797 } 2798 2799 void i915_global_gtt_cleanup(struct drm_device *dev) 2800 { 2801 struct drm_i915_private *dev_priv = dev->dev_private; 2802 struct i915_address_space *vm = &dev_priv->gtt.base; 2803 2804 if (dev_priv->mm.aliasing_ppgtt) { 2805 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 2806 2807 ppgtt->base.cleanup(&ppgtt->base); 2808 } 2809 2810 if (drm_mm_initialized(&vm->mm)) { 2811 if (intel_vgpu_active(dev)) 2812 intel_vgt_deballoon(); 2813 2814 drm_mm_takedown(&vm->mm); 2815 list_del(&vm->global_link); 2816 } 2817 2818 vm->cleanup(vm); 2819 } 2820 2821 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 2822 { 2823 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 2824 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 2825 return snb_gmch_ctl << 20; 2826 } 2827 2828 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 2829 { 2830 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 2831 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 2832 if (bdw_gmch_ctl) 2833 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 2834 2835 #ifdef CONFIG_X86_32 2836 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ 2837 if (bdw_gmch_ctl > 4) 2838 bdw_gmch_ctl = 4; 2839 #endif 2840 2841 return bdw_gmch_ctl << 20; 2842 } 2843 2844 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 2845 { 2846 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 2847 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 2848 2849 if (gmch_ctrl) 2850 return 1 << (20 + gmch_ctrl); 2851 2852 return 0; 2853 } 2854 2855 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 2856 { 2857 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 2858 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 2859 return snb_gmch_ctl << 25; /* 32 MB units */ 2860 } 2861 2862 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 2863 { 2864 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2865 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 2866 return bdw_gmch_ctl << 25; /* 32 MB units */ 2867 } 2868 2869 static size_t chv_get_stolen_size(u16 gmch_ctrl) 2870 { 2871 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; 2872 gmch_ctrl &= SNB_GMCH_GMS_MASK; 2873 2874 /* 2875 * 0x0 to 0x10: 32MB increments starting at 0MB 2876 * 0x11 to 0x16: 4MB increments starting at 8MB 2877 * 0x17 to 0x1d: 4MB increments start at 36MB 2878 */ 2879 if (gmch_ctrl < 0x11) 2880 return gmch_ctrl << 25; 2881 else if (gmch_ctrl < 0x17) 2882 return (gmch_ctrl - 0x11 + 2) << 22; 2883 else 2884 return (gmch_ctrl - 0x17 + 9) << 22; 2885 } 2886 2887 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) 2888 { 2889 gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 2890 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; 2891 2892 if (gen9_gmch_ctl < 0xf0) 2893 return gen9_gmch_ctl << 25; /* 32 MB units */ 2894 else 2895 /* 4MB increments starting at 0xf0 for 4MB */ 2896 return (gen9_gmch_ctl - 0xf0 + 1) << 22; 2897 } 2898 2899 static int ggtt_probe_common(struct drm_device *dev, 2900 size_t gtt_size) 2901 { 2902 struct drm_i915_private *dev_priv = dev->dev_private; 2903 struct i915_page_scratch *scratch_page; 2904 phys_addr_t gtt_phys_addr; 2905 2906 /* For Modern GENs the PTEs and register space are split in the BAR */ 2907 gtt_phys_addr = pci_resource_start(dev->pdev, 0) + 2908 (pci_resource_len(dev->pdev, 0) / 2); 2909 2910 /* 2911 * On BXT writes larger than 64 bit to the GTT pagetable range will be 2912 * dropped. For WC mappings in general we have 64 byte burst writes 2913 * when the WC buffer is flushed, so we can't use it, but have to 2914 * resort to an uncached mapping. The WC issue is easily caught by the 2915 * readback check when writing GTT PTE entries. 2916 */ 2917 if (IS_BROXTON(dev)) 2918 dev_priv->gtt.gsm = ioremap_nocache(gtt_phys_addr, gtt_size); 2919 else 2920 dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size); 2921 if (!dev_priv->gtt.gsm) { 2922 DRM_ERROR("Failed to map the gtt page table\n"); 2923 return -ENOMEM; 2924 } 2925 2926 scratch_page = alloc_scratch_page(dev); 2927 if (IS_ERR(scratch_page)) { 2928 DRM_ERROR("Scratch setup failed\n"); 2929 /* iounmap will also get called at remove, but meh */ 2930 iounmap(dev_priv->gtt.gsm); 2931 return PTR_ERR(scratch_page); 2932 } 2933 2934 dev_priv->gtt.base.scratch_page = scratch_page; 2935 2936 return 0; 2937 } 2938 2939 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 2940 * bits. When using advanced contexts each context stores its own PAT, but 2941 * writing this data shouldn't be harmful even in those cases. */ 2942 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) 2943 { 2944 uint64_t pat; 2945 2946 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 2947 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 2948 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 2949 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 2950 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 2951 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 2952 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 2953 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 2954 2955 if (!USES_PPGTT(dev_priv->dev)) 2956 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry, 2957 * so RTL will always use the value corresponding to 2958 * pat_sel = 000". 2959 * So let's disable cache for GGTT to avoid screen corruptions. 2960 * MOCS still can be used though. 2961 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work 2962 * before this patch, i.e. the same uncached + snooping access 2963 * like on gen6/7 seems to be in effect. 2964 * - So this just fixes blitter/render access. Again it looks 2965 * like it's not just uncached access, but uncached + snooping. 2966 * So we can still hold onto all our assumptions wrt cpu 2967 * clflushing on LLC machines. 2968 */ 2969 pat = GEN8_PPAT(0, GEN8_PPAT_UC); 2970 2971 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 2972 * write would work. */ 2973 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 2974 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 2975 } 2976 2977 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) 2978 { 2979 uint64_t pat; 2980 2981 /* 2982 * Map WB on BDW to snooped on CHV. 2983 * 2984 * Only the snoop bit has meaning for CHV, the rest is 2985 * ignored. 2986 * 2987 * The hardware will never snoop for certain types of accesses: 2988 * - CPU GTT (GMADR->GGTT->no snoop->memory) 2989 * - PPGTT page tables 2990 * - some other special cycles 2991 * 2992 * As with BDW, we also need to consider the following for GT accesses: 2993 * "For GGTT, there is NO pat_sel[2:0] from the entry, 2994 * so RTL will always use the value corresponding to 2995 * pat_sel = 000". 2996 * Which means we must set the snoop bit in PAT entry 0 2997 * in order to keep the global status page working. 2998 */ 2999 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 3000 GEN8_PPAT(1, 0) | 3001 GEN8_PPAT(2, 0) | 3002 GEN8_PPAT(3, 0) | 3003 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 3004 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 3005 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 3006 GEN8_PPAT(7, CHV_PPAT_SNOOP); 3007 3008 I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); 3009 I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); 3010 } 3011 3012 static int gen8_gmch_probe(struct drm_device *dev, 3013 u64 *gtt_total, 3014 size_t *stolen, 3015 phys_addr_t *mappable_base, 3016 u64 *mappable_end) 3017 { 3018 struct drm_i915_private *dev_priv = dev->dev_private; 3019 u64 gtt_size; 3020 u16 snb_gmch_ctl; 3021 int ret; 3022 3023 /* TODO: We're not aware of mappable constraints on gen8 yet */ 3024 *mappable_base = pci_resource_start(dev->pdev, 2); 3025 *mappable_end = pci_resource_len(dev->pdev, 2); 3026 3027 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39))) 3028 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39)); 3029 3030 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3031 3032 if (INTEL_INFO(dev)->gen >= 9) { 3033 *stolen = gen9_get_stolen_size(snb_gmch_ctl); 3034 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl); 3035 } else if (IS_CHERRYVIEW(dev)) { 3036 *stolen = chv_get_stolen_size(snb_gmch_ctl); 3037 gtt_size = chv_get_total_gtt_size(snb_gmch_ctl); 3038 } else { 3039 *stolen = gen8_get_stolen_size(snb_gmch_ctl); 3040 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl); 3041 } 3042 3043 *gtt_total = (gtt_size / sizeof(gen8_pte_t)) << PAGE_SHIFT; 3044 3045 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) 3046 chv_setup_private_ppat(dev_priv); 3047 else 3048 bdw_setup_private_ppat(dev_priv); 3049 3050 ret = ggtt_probe_common(dev, gtt_size); 3051 3052 dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range; 3053 dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries; 3054 dev_priv->gtt.base.bind_vma = ggtt_bind_vma; 3055 dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma; 3056 3057 if (IS_CHERRYVIEW(dev_priv)) 3058 dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries__BKL; 3059 3060 return ret; 3061 } 3062 3063 static int gen6_gmch_probe(struct drm_device *dev, 3064 u64 *gtt_total, 3065 size_t *stolen, 3066 phys_addr_t *mappable_base, 3067 u64 *mappable_end) 3068 { 3069 struct drm_i915_private *dev_priv = dev->dev_private; 3070 unsigned int gtt_size; 3071 u16 snb_gmch_ctl; 3072 int ret; 3073 3074 *mappable_base = pci_resource_start(dev->pdev, 2); 3075 *mappable_end = pci_resource_len(dev->pdev, 2); 3076 3077 /* 64/512MB is the current min/max we actually know of, but this is just 3078 * a coarse sanity check. 3079 */ 3080 if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) { 3081 DRM_ERROR("Unknown GMADR size (%llx)\n", 3082 dev_priv->gtt.mappable_end); 3083 return -ENXIO; 3084 } 3085 3086 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) 3087 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); 3088 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 3089 3090 *stolen = gen6_get_stolen_size(snb_gmch_ctl); 3091 3092 gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl); 3093 *gtt_total = (gtt_size / sizeof(gen6_pte_t)) << PAGE_SHIFT; 3094 3095 ret = ggtt_probe_common(dev, gtt_size); 3096 3097 dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range; 3098 dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries; 3099 dev_priv->gtt.base.bind_vma = ggtt_bind_vma; 3100 dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma; 3101 3102 return ret; 3103 } 3104 3105 static void gen6_gmch_remove(struct i915_address_space *vm) 3106 { 3107 3108 struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base); 3109 3110 iounmap(gtt->gsm); 3111 free_scratch_page(vm->dev, vm->scratch_page); 3112 } 3113 3114 static int i915_gmch_probe(struct drm_device *dev, 3115 u64 *gtt_total, 3116 size_t *stolen, 3117 phys_addr_t *mappable_base, 3118 u64 *mappable_end) 3119 { 3120 struct drm_i915_private *dev_priv = dev->dev_private; 3121 int ret; 3122 3123 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL); 3124 if (!ret) { 3125 DRM_ERROR("failed to set up gmch\n"); 3126 return -EIO; 3127 } 3128 3129 intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end); 3130 3131 dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev); 3132 dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries; 3133 dev_priv->gtt.base.clear_range = i915_ggtt_clear_range; 3134 dev_priv->gtt.base.bind_vma = ggtt_bind_vma; 3135 dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma; 3136 3137 if (unlikely(dev_priv->gtt.do_idle_maps)) 3138 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 3139 3140 return 0; 3141 } 3142 3143 static void i915_gmch_remove(struct i915_address_space *vm) 3144 { 3145 intel_gmch_remove(); 3146 } 3147 3148 int i915_gem_gtt_init(struct drm_device *dev) 3149 { 3150 struct drm_i915_private *dev_priv = dev->dev_private; 3151 struct i915_gtt *gtt = &dev_priv->gtt; 3152 int ret; 3153 3154 if (INTEL_INFO(dev)->gen <= 5) { 3155 gtt->gtt_probe = i915_gmch_probe; 3156 gtt->base.cleanup = i915_gmch_remove; 3157 } else if (INTEL_INFO(dev)->gen < 8) { 3158 gtt->gtt_probe = gen6_gmch_probe; 3159 gtt->base.cleanup = gen6_gmch_remove; 3160 if (IS_HASWELL(dev) && dev_priv->ellc_size) 3161 gtt->base.pte_encode = iris_pte_encode; 3162 else if (IS_HASWELL(dev)) 3163 gtt->base.pte_encode = hsw_pte_encode; 3164 else if (IS_VALLEYVIEW(dev)) 3165 gtt->base.pte_encode = byt_pte_encode; 3166 else if (INTEL_INFO(dev)->gen >= 7) 3167 gtt->base.pte_encode = ivb_pte_encode; 3168 else 3169 gtt->base.pte_encode = snb_pte_encode; 3170 } else { 3171 dev_priv->gtt.gtt_probe = gen8_gmch_probe; 3172 dev_priv->gtt.base.cleanup = gen6_gmch_remove; 3173 } 3174 3175 gtt->base.dev = dev; 3176 3177 ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size, 3178 >t->mappable_base, >t->mappable_end); 3179 if (ret) 3180 return ret; 3181 3182 /* GMADR is the PCI mmio aperture into the global GTT. */ 3183 DRM_INFO("Memory usable by graphics device = %lluM\n", 3184 gtt->base.total >> 20); 3185 DRM_DEBUG_DRIVER("GMADR size = %lldM\n", gtt->mappable_end >> 20); 3186 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20); 3187 #ifdef CONFIG_INTEL_IOMMU 3188 if (intel_iommu_gfx_mapped) 3189 DRM_INFO("VT-d active for gfx access\n"); 3190 #endif 3191 /* 3192 * i915.enable_ppgtt is read-only, so do an early pass to validate the 3193 * user's requested state against the hardware/driver capabilities. We 3194 * do this now so that we can print out any log messages once rather 3195 * than every time we check intel_enable_ppgtt(). 3196 */ 3197 i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt); 3198 DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); 3199 3200 return 0; 3201 } 3202 3203 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 3204 { 3205 struct drm_i915_private *dev_priv = dev->dev_private; 3206 struct drm_i915_gem_object *obj; 3207 struct i915_address_space *vm; 3208 struct i915_vma *vma; 3209 bool flush; 3210 3211 i915_check_and_clear_faults(dev); 3212 3213 /* First fill our portion of the GTT with scratch pages */ 3214 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 3215 dev_priv->gtt.base.start, 3216 dev_priv->gtt.base.total, 3217 true); 3218 3219 /* Cache flush objects bound into GGTT and rebind them. */ 3220 vm = &dev_priv->gtt.base; 3221 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 3222 flush = false; 3223 list_for_each_entry(vma, &obj->vma_list, vma_link) { 3224 if (vma->vm != vm) 3225 continue; 3226 3227 WARN_ON(i915_vma_bind(vma, obj->cache_level, 3228 PIN_UPDATE)); 3229 3230 flush = true; 3231 } 3232 3233 if (flush) 3234 i915_gem_clflush_object(obj, obj->pin_display); 3235 } 3236 3237 if (INTEL_INFO(dev)->gen >= 8) { 3238 if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) 3239 chv_setup_private_ppat(dev_priv); 3240 else 3241 bdw_setup_private_ppat(dev_priv); 3242 3243 return; 3244 } 3245 3246 if (USES_PPGTT(dev)) { 3247 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 3248 /* TODO: Perhaps it shouldn't be gen6 specific */ 3249 3250 struct i915_hw_ppgtt *ppgtt = 3251 container_of(vm, struct i915_hw_ppgtt, 3252 base); 3253 3254 if (i915_is_ggtt(vm)) 3255 ppgtt = dev_priv->mm.aliasing_ppgtt; 3256 3257 gen6_write_page_range(dev_priv, &ppgtt->pd, 3258 0, ppgtt->base.total); 3259 } 3260 } 3261 3262 i915_ggtt_flush(dev_priv); 3263 } 3264 3265 static struct i915_vma * 3266 __i915_gem_vma_create(struct drm_i915_gem_object *obj, 3267 struct i915_address_space *vm, 3268 const struct i915_ggtt_view *ggtt_view) 3269 { 3270 struct i915_vma *vma; 3271 3272 if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) 3273 return ERR_PTR(-EINVAL); 3274 3275 vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); 3276 if (vma == NULL) 3277 return ERR_PTR(-ENOMEM); 3278 3279 INIT_LIST_HEAD(&vma->vma_link); 3280 INIT_LIST_HEAD(&vma->mm_list); 3281 INIT_LIST_HEAD(&vma->exec_list); 3282 vma->vm = vm; 3283 vma->obj = obj; 3284 3285 if (i915_is_ggtt(vm)) 3286 vma->ggtt_view = *ggtt_view; 3287 3288 list_add_tail(&vma->vma_link, &obj->vma_list); 3289 if (!i915_is_ggtt(vm)) 3290 i915_ppgtt_get(i915_vm_to_ppgtt(vm)); 3291 3292 return vma; 3293 } 3294 3295 struct i915_vma * 3296 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 3297 struct i915_address_space *vm) 3298 { 3299 struct i915_vma *vma; 3300 3301 vma = i915_gem_obj_to_vma(obj, vm); 3302 if (!vma) 3303 vma = __i915_gem_vma_create(obj, vm, 3304 i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL); 3305 3306 return vma; 3307 } 3308 3309 struct i915_vma * 3310 i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj, 3311 const struct i915_ggtt_view *view) 3312 { 3313 struct i915_address_space *ggtt = i915_obj_to_ggtt(obj); 3314 struct i915_vma *vma; 3315 3316 if (WARN_ON(!view)) 3317 return ERR_PTR(-EINVAL); 3318 3319 vma = i915_gem_obj_to_ggtt_view(obj, view); 3320 3321 if (IS_ERR(vma)) 3322 return vma; 3323 3324 if (!vma) 3325 vma = __i915_gem_vma_create(obj, ggtt, view); 3326 3327 return vma; 3328 3329 } 3330 3331 static struct scatterlist * 3332 rotate_pages(dma_addr_t *in, unsigned int offset, 3333 unsigned int width, unsigned int height, 3334 struct sg_table *st, struct scatterlist *sg) 3335 { 3336 unsigned int column, row; 3337 unsigned int src_idx; 3338 3339 if (!sg) { 3340 st->nents = 0; 3341 sg = st->sgl; 3342 } 3343 3344 for (column = 0; column < width; column++) { 3345 src_idx = width * (height - 1) + column; 3346 for (row = 0; row < height; row++) { 3347 st->nents++; 3348 /* We don't need the pages, but need to initialize 3349 * the entries so the sg list can be happily traversed. 3350 * The only thing we need are DMA addresses. 3351 */ 3352 sg_set_page(sg, NULL, PAGE_SIZE, 0); 3353 sg_dma_address(sg) = in[offset + src_idx]; 3354 sg_dma_len(sg) = PAGE_SIZE; 3355 sg = sg_next(sg); 3356 src_idx -= width; 3357 } 3358 } 3359 3360 return sg; 3361 } 3362 3363 static struct sg_table * 3364 intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view, 3365 struct drm_i915_gem_object *obj) 3366 { 3367 struct intel_rotation_info *rot_info = &ggtt_view->params.rotation_info; 3368 unsigned int size_pages = rot_info->size >> PAGE_SHIFT; 3369 unsigned int size_pages_uv; 3370 struct sg_page_iter sg_iter; 3371 unsigned long i; 3372 dma_addr_t *page_addr_list; 3373 struct sg_table *st; 3374 unsigned int uv_start_page; 3375 struct scatterlist *sg; 3376 int ret = -ENOMEM; 3377 3378 /* Allocate a temporary list of source pages for random access. */ 3379 page_addr_list = drm_malloc_ab(obj->base.size / PAGE_SIZE, 3380 sizeof(dma_addr_t)); 3381 if (!page_addr_list) 3382 return ERR_PTR(ret); 3383 3384 /* Account for UV plane with NV12. */ 3385 if (rot_info->pixel_format == DRM_FORMAT_NV12) 3386 size_pages_uv = rot_info->size_uv >> PAGE_SHIFT; 3387 else 3388 size_pages_uv = 0; 3389 3390 /* Allocate target SG list. */ 3391 st = kmalloc(sizeof(*st), GFP_KERNEL); 3392 if (!st) 3393 goto err_st_alloc; 3394 3395 ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL); 3396 if (ret) 3397 goto err_sg_alloc; 3398 3399 /* Populate source page list from the object. */ 3400 i = 0; 3401 for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { 3402 page_addr_list[i] = sg_page_iter_dma_address(&sg_iter); 3403 i++; 3404 } 3405 3406 /* Rotate the pages. */ 3407 sg = rotate_pages(page_addr_list, 0, 3408 rot_info->width_pages, rot_info->height_pages, 3409 st, NULL); 3410 3411 /* Append the UV plane if NV12. */ 3412 if (rot_info->pixel_format == DRM_FORMAT_NV12) { 3413 uv_start_page = size_pages; 3414 3415 /* Check for tile-row un-alignment. */ 3416 if (offset_in_page(rot_info->uv_offset)) 3417 uv_start_page--; 3418 3419 rot_info->uv_start_page = uv_start_page; 3420 3421 rotate_pages(page_addr_list, uv_start_page, 3422 rot_info->width_pages_uv, 3423 rot_info->height_pages_uv, 3424 st, sg); 3425 } 3426 3427 DRM_DEBUG_KMS( 3428 "Created rotated page mapping for object size %zu (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0)).\n", 3429 obj->base.size, rot_info->pitch, rot_info->height, 3430 rot_info->pixel_format, rot_info->width_pages, 3431 rot_info->height_pages, size_pages + size_pages_uv, 3432 size_pages); 3433 3434 drm_free_large(page_addr_list); 3435 3436 return st; 3437 3438 err_sg_alloc: 3439 kfree(st); 3440 err_st_alloc: 3441 drm_free_large(page_addr_list); 3442 3443 DRM_DEBUG_KMS( 3444 "Failed to create rotated mapping for object size %zu! (%d) (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0))\n", 3445 obj->base.size, ret, rot_info->pitch, rot_info->height, 3446 rot_info->pixel_format, rot_info->width_pages, 3447 rot_info->height_pages, size_pages + size_pages_uv, 3448 size_pages); 3449 return ERR_PTR(ret); 3450 } 3451 3452 static struct sg_table * 3453 intel_partial_pages(const struct i915_ggtt_view *view, 3454 struct drm_i915_gem_object *obj) 3455 { 3456 struct sg_table *st; 3457 struct scatterlist *sg; 3458 struct sg_page_iter obj_sg_iter; 3459 int ret = -ENOMEM; 3460 3461 st = kmalloc(sizeof(*st), GFP_KERNEL); 3462 if (!st) 3463 goto err_st_alloc; 3464 3465 ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL); 3466 if (ret) 3467 goto err_sg_alloc; 3468 3469 sg = st->sgl; 3470 st->nents = 0; 3471 for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents, 3472 view->params.partial.offset) 3473 { 3474 if (st->nents >= view->params.partial.size) 3475 break; 3476 3477 sg_set_page(sg, NULL, PAGE_SIZE, 0); 3478 sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter); 3479 sg_dma_len(sg) = PAGE_SIZE; 3480 3481 sg = sg_next(sg); 3482 st->nents++; 3483 } 3484 3485 return st; 3486 3487 err_sg_alloc: 3488 kfree(st); 3489 err_st_alloc: 3490 return ERR_PTR(ret); 3491 } 3492 3493 static int 3494 i915_get_ggtt_vma_pages(struct i915_vma *vma) 3495 { 3496 int ret = 0; 3497 3498 if (vma->ggtt_view.pages) 3499 return 0; 3500 3501 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) 3502 vma->ggtt_view.pages = vma->obj->pages; 3503 else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) 3504 vma->ggtt_view.pages = 3505 intel_rotate_fb_obj_pages(&vma->ggtt_view, vma->obj); 3506 else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) 3507 vma->ggtt_view.pages = 3508 intel_partial_pages(&vma->ggtt_view, vma->obj); 3509 else 3510 WARN_ONCE(1, "GGTT view %u not implemented!\n", 3511 vma->ggtt_view.type); 3512 3513 if (!vma->ggtt_view.pages) { 3514 DRM_ERROR("Failed to get pages for GGTT view type %u!\n", 3515 vma->ggtt_view.type); 3516 ret = -EINVAL; 3517 } else if (IS_ERR(vma->ggtt_view.pages)) { 3518 ret = PTR_ERR(vma->ggtt_view.pages); 3519 vma->ggtt_view.pages = NULL; 3520 DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", 3521 vma->ggtt_view.type, ret); 3522 } 3523 3524 return ret; 3525 } 3526 3527 /** 3528 * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. 3529 * @vma: VMA to map 3530 * @cache_level: mapping cache level 3531 * @flags: flags like global or local mapping 3532 * 3533 * DMA addresses are taken from the scatter-gather table of this object (or of 3534 * this VMA in case of non-default GGTT views) and PTE entries set up. 3535 * Note that DMA addresses are also the only part of the SG table we care about. 3536 */ 3537 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, 3538 u32 flags) 3539 { 3540 int ret; 3541 u32 bind_flags; 3542 3543 if (WARN_ON(flags == 0)) 3544 return -EINVAL; 3545 3546 bind_flags = 0; 3547 if (flags & PIN_GLOBAL) 3548 bind_flags |= GLOBAL_BIND; 3549 if (flags & PIN_USER) 3550 bind_flags |= LOCAL_BIND; 3551 3552 if (flags & PIN_UPDATE) 3553 bind_flags |= vma->bound; 3554 else 3555 bind_flags &= ~vma->bound; 3556 3557 if (bind_flags == 0) 3558 return 0; 3559 3560 if (vma->bound == 0 && vma->vm->allocate_va_range) { 3561 trace_i915_va_alloc(vma->vm, 3562 vma->node.start, 3563 vma->node.size, 3564 VM_TO_TRACE_NAME(vma->vm)); 3565 3566 /* XXX: i915_vma_pin() will fix this +- hack */ 3567 vma->pin_count++; 3568 ret = vma->vm->allocate_va_range(vma->vm, 3569 vma->node.start, 3570 vma->node.size); 3571 vma->pin_count--; 3572 if (ret) 3573 return ret; 3574 } 3575 3576 ret = vma->vm->bind_vma(vma, cache_level, bind_flags); 3577 if (ret) 3578 return ret; 3579 3580 vma->bound |= bind_flags; 3581 3582 return 0; 3583 } 3584 3585 /** 3586 * i915_ggtt_view_size - Get the size of a GGTT view. 3587 * @obj: Object the view is of. 3588 * @view: The view in question. 3589 * 3590 * @return The size of the GGTT view in bytes. 3591 */ 3592 size_t 3593 i915_ggtt_view_size(struct drm_i915_gem_object *obj, 3594 const struct i915_ggtt_view *view) 3595 { 3596 if (view->type == I915_GGTT_VIEW_NORMAL) { 3597 return obj->base.size; 3598 } else if (view->type == I915_GGTT_VIEW_ROTATED) { 3599 return view->params.rotation_info.size; 3600 } else if (view->type == I915_GGTT_VIEW_PARTIAL) { 3601 return view->params.partial.size << PAGE_SHIFT; 3602 } else { 3603 WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type); 3604 return obj->base.size; 3605 } 3606 } 3607