1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/seq_file.h> 27 #include <drm/drmP.h> 28 #include <drm/i915_drm.h> 29 #include "i915_drv.h" 30 #include "i915_trace.h" 31 #include "intel_drv.h" 32 33 /** 34 * DOC: Global GTT views 35 * 36 * Background and previous state 37 * 38 * Historically objects could exists (be bound) in global GTT space only as 39 * singular instances with a view representing all of the object's backing pages 40 * in a linear fashion. This view will be called a normal view. 41 * 42 * To support multiple views of the same object, where the number of mapped 43 * pages is not equal to the backing store, or where the layout of the pages 44 * is not linear, concept of a GGTT view was added. 45 * 46 * One example of an alternative view is a stereo display driven by a single 47 * image. In this case we would have a framebuffer looking like this 48 * (2x2 pages): 49 * 50 * 12 51 * 34 52 * 53 * Above would represent a normal GGTT view as normally mapped for GPU or CPU 54 * rendering. In contrast, fed to the display engine would be an alternative 55 * view which could look something like this: 56 * 57 * 1212 58 * 3434 59 * 60 * In this example both the size and layout of pages in the alternative view is 61 * different from the normal view. 62 * 63 * Implementation and usage 64 * 65 * GGTT views are implemented using VMAs and are distinguished via enum 66 * i915_ggtt_view_type and struct i915_ggtt_view. 67 * 68 * A new flavour of core GEM functions which work with GGTT bound objects were 69 * added with the _view suffix. They take the struct i915_ggtt_view parameter 70 * encapsulating all metadata required to implement a view. 71 * 72 * As a helper for callers which are only interested in the normal view, 73 * globally const i915_ggtt_view_normal singleton instance exists. All old core 74 * GEM API functions, the ones not taking the view parameter, are operating on, 75 * or with the normal GGTT view. 76 * 77 * Code wanting to add or use a new GGTT view needs to: 78 * 79 * 1. Add a new enum with a suitable name. 80 * 2. Extend the metadata in the i915_ggtt_view structure if required. 81 * 3. Add support to i915_get_vma_pages(). 82 * 83 * New views are required to build a scatter-gather table from within the 84 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and 85 * exists for the lifetime of an VMA. 86 * 87 * Core API is designed to have copy semantics which means that passed in 88 * struct i915_ggtt_view does not need to be persistent (left around after 89 * calling the core API functions). 90 * 91 */ 92 93 const struct i915_ggtt_view i915_ggtt_view_normal; 94 95 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv); 96 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv); 97 98 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt) 99 { 100 bool has_aliasing_ppgtt; 101 bool has_full_ppgtt; 102 103 has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6; 104 has_full_ppgtt = INTEL_INFO(dev)->gen >= 7; 105 106 /* 107 * We don't allow disabling PPGTT for gen9+ as it's a requirement for 108 * execlists, the sole mechanism available to submit work. 109 */ 110 if (INTEL_INFO(dev)->gen < 9 && 111 (enable_ppgtt == 0 || !has_aliasing_ppgtt)) 112 return 0; 113 114 if (enable_ppgtt == 1) 115 return 1; 116 117 if (enable_ppgtt == 2 && has_full_ppgtt) 118 return 2; 119 120 #ifdef CONFIG_INTEL_IOMMU 121 /* Disable ppgtt on SNB if VT-d is on. */ 122 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) { 123 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 124 return 0; 125 } 126 #endif 127 128 /* Early VLV doesn't have this */ 129 if (IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) && 130 dev->pdev->revision < 0xb) { 131 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n"); 132 return 0; 133 } 134 135 if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists) 136 return 2; 137 else 138 return has_aliasing_ppgtt ? 1 : 0; 139 } 140 141 142 static void ppgtt_bind_vma(struct i915_vma *vma, 143 enum i915_cache_level cache_level, 144 u32 flags); 145 static void ppgtt_unbind_vma(struct i915_vma *vma); 146 147 static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, 148 enum i915_cache_level level, 149 bool valid) 150 { 151 gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; 152 pte |= addr; 153 154 switch (level) { 155 case I915_CACHE_NONE: 156 pte |= PPAT_UNCACHED_INDEX; 157 break; 158 case I915_CACHE_WT: 159 pte |= PPAT_DISPLAY_ELLC_INDEX; 160 break; 161 default: 162 pte |= PPAT_CACHED_INDEX; 163 break; 164 } 165 166 return pte; 167 } 168 169 static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev, 170 dma_addr_t addr, 171 enum i915_cache_level level) 172 { 173 gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 174 pde |= addr; 175 if (level != I915_CACHE_NONE) 176 pde |= PPAT_CACHED_PDE_INDEX; 177 else 178 pde |= PPAT_UNCACHED_INDEX; 179 return pde; 180 } 181 182 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, 183 enum i915_cache_level level, 184 bool valid, u32 unused) 185 { 186 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 187 pte |= GEN6_PTE_ADDR_ENCODE(addr); 188 189 switch (level) { 190 case I915_CACHE_L3_LLC: 191 case I915_CACHE_LLC: 192 pte |= GEN6_PTE_CACHE_LLC; 193 break; 194 case I915_CACHE_NONE: 195 pte |= GEN6_PTE_UNCACHED; 196 break; 197 default: 198 MISSING_CASE(level); 199 } 200 201 return pte; 202 } 203 204 static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr, 205 enum i915_cache_level level, 206 bool valid, u32 unused) 207 { 208 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 209 pte |= GEN6_PTE_ADDR_ENCODE(addr); 210 211 switch (level) { 212 case I915_CACHE_L3_LLC: 213 pte |= GEN7_PTE_CACHE_L3_LLC; 214 break; 215 case I915_CACHE_LLC: 216 pte |= GEN6_PTE_CACHE_LLC; 217 break; 218 case I915_CACHE_NONE: 219 pte |= GEN6_PTE_UNCACHED; 220 break; 221 default: 222 MISSING_CASE(level); 223 } 224 225 return pte; 226 } 227 228 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, 229 enum i915_cache_level level, 230 bool valid, u32 flags) 231 { 232 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 233 pte |= GEN6_PTE_ADDR_ENCODE(addr); 234 235 if (!(flags & PTE_READ_ONLY)) 236 pte |= BYT_PTE_WRITEABLE; 237 238 if (level != I915_CACHE_NONE) 239 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 240 241 return pte; 242 } 243 244 static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr, 245 enum i915_cache_level level, 246 bool valid, u32 unused) 247 { 248 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 249 pte |= HSW_PTE_ADDR_ENCODE(addr); 250 251 if (level != I915_CACHE_NONE) 252 pte |= HSW_WB_LLC_AGE3; 253 254 return pte; 255 } 256 257 static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, 258 enum i915_cache_level level, 259 bool valid, u32 unused) 260 { 261 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 262 pte |= HSW_PTE_ADDR_ENCODE(addr); 263 264 switch (level) { 265 case I915_CACHE_NONE: 266 break; 267 case I915_CACHE_WT: 268 pte |= HSW_WT_ELLC_LLC_AGE3; 269 break; 270 default: 271 pte |= HSW_WB_ELLC_LLC_AGE3; 272 break; 273 } 274 275 return pte; 276 } 277 278 /* Broadwell Page Directory Pointer Descriptors */ 279 static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, 280 uint64_t val) 281 { 282 int ret; 283 284 BUG_ON(entry >= 4); 285 286 ret = intel_ring_begin(ring, 6); 287 if (ret) 288 return ret; 289 290 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 291 intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry)); 292 intel_ring_emit(ring, (u32)(val >> 32)); 293 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 294 intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry)); 295 intel_ring_emit(ring, (u32)(val)); 296 intel_ring_advance(ring); 297 298 return 0; 299 } 300 301 static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, 302 struct intel_engine_cs *ring) 303 { 304 int i, ret; 305 306 /* bit of a hack to find the actual last used pd */ 307 int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE; 308 309 for (i = used_pd - 1; i >= 0; i--) { 310 dma_addr_t addr = ppgtt->pd_dma_addr[i]; 311 ret = gen8_write_pdp(ring, i, addr); 312 if (ret) 313 return ret; 314 } 315 316 return 0; 317 } 318 319 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 320 uint64_t start, 321 uint64_t length, 322 bool use_scratch) 323 { 324 struct i915_hw_ppgtt *ppgtt = 325 container_of(vm, struct i915_hw_ppgtt, base); 326 gen8_gtt_pte_t *pt_vaddr, scratch_pte; 327 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; 328 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; 329 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; 330 unsigned num_entries = length >> PAGE_SHIFT; 331 unsigned last_pte, i; 332 333 scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr, 334 I915_CACHE_LLC, use_scratch); 335 336 while (num_entries) { 337 struct page *page_table = ppgtt->gen8_pt_pages[pdpe][pde]; 338 339 last_pte = pte + num_entries; 340 if (last_pte > GEN8_PTES_PER_PAGE) 341 last_pte = GEN8_PTES_PER_PAGE; 342 343 pt_vaddr = kmap_atomic(page_table); 344 345 for (i = pte; i < last_pte; i++) { 346 pt_vaddr[i] = scratch_pte; 347 num_entries--; 348 } 349 350 if (!HAS_LLC(ppgtt->base.dev)) 351 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 352 kunmap_atomic(pt_vaddr); 353 354 pte = 0; 355 if (++pde == GEN8_PDES_PER_PAGE) { 356 pdpe++; 357 pde = 0; 358 } 359 } 360 } 361 362 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 363 struct sg_table *pages, 364 uint64_t start, 365 enum i915_cache_level cache_level, u32 unused) 366 { 367 struct i915_hw_ppgtt *ppgtt = 368 container_of(vm, struct i915_hw_ppgtt, base); 369 gen8_gtt_pte_t *pt_vaddr; 370 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; 371 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; 372 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; 373 struct sg_page_iter sg_iter; 374 375 pt_vaddr = NULL; 376 377 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { 378 if (WARN_ON(pdpe >= GEN8_LEGACY_PDPS)) 379 break; 380 381 if (pt_vaddr == NULL) 382 pt_vaddr = kmap_atomic(ppgtt->gen8_pt_pages[pdpe][pde]); 383 384 pt_vaddr[pte] = 385 gen8_pte_encode(sg_page_iter_dma_address(&sg_iter), 386 cache_level, true); 387 if (++pte == GEN8_PTES_PER_PAGE) { 388 if (!HAS_LLC(ppgtt->base.dev)) 389 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 390 kunmap_atomic(pt_vaddr); 391 pt_vaddr = NULL; 392 if (++pde == GEN8_PDES_PER_PAGE) { 393 pdpe++; 394 pde = 0; 395 } 396 pte = 0; 397 } 398 } 399 if (pt_vaddr) { 400 if (!HAS_LLC(ppgtt->base.dev)) 401 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 402 kunmap_atomic(pt_vaddr); 403 } 404 } 405 406 static void gen8_free_page_tables(struct page **pt_pages) 407 { 408 int i; 409 410 if (pt_pages == NULL) 411 return; 412 413 for (i = 0; i < GEN8_PDES_PER_PAGE; i++) 414 if (pt_pages[i]) 415 __free_pages(pt_pages[i], 0); 416 } 417 418 static void gen8_ppgtt_free(const struct i915_hw_ppgtt *ppgtt) 419 { 420 int i; 421 422 for (i = 0; i < ppgtt->num_pd_pages; i++) { 423 gen8_free_page_tables(ppgtt->gen8_pt_pages[i]); 424 kfree(ppgtt->gen8_pt_pages[i]); 425 kfree(ppgtt->gen8_pt_dma_addr[i]); 426 } 427 428 __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT)); 429 } 430 431 static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) 432 { 433 struct pci_dev *hwdev = ppgtt->base.dev->pdev; 434 int i, j; 435 436 for (i = 0; i < ppgtt->num_pd_pages; i++) { 437 /* TODO: In the future we'll support sparse mappings, so this 438 * will have to change. */ 439 if (!ppgtt->pd_dma_addr[i]) 440 continue; 441 442 pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE, 443 PCI_DMA_BIDIRECTIONAL); 444 445 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 446 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; 447 if (addr) 448 pci_unmap_page(hwdev, addr, PAGE_SIZE, 449 PCI_DMA_BIDIRECTIONAL); 450 } 451 } 452 } 453 454 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 455 { 456 struct i915_hw_ppgtt *ppgtt = 457 container_of(vm, struct i915_hw_ppgtt, base); 458 459 gen8_ppgtt_unmap_pages(ppgtt); 460 gen8_ppgtt_free(ppgtt); 461 } 462 463 static struct page **__gen8_alloc_page_tables(void) 464 { 465 struct page **pt_pages; 466 int i; 467 468 pt_pages = kcalloc(GEN8_PDES_PER_PAGE, sizeof(struct page *), GFP_KERNEL); 469 if (!pt_pages) 470 return ERR_PTR(-ENOMEM); 471 472 for (i = 0; i < GEN8_PDES_PER_PAGE; i++) { 473 pt_pages[i] = alloc_page(GFP_KERNEL); 474 if (!pt_pages[i]) 475 goto bail; 476 } 477 478 return pt_pages; 479 480 bail: 481 gen8_free_page_tables(pt_pages); 482 kfree(pt_pages); 483 return ERR_PTR(-ENOMEM); 484 } 485 486 static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt, 487 const int max_pdp) 488 { 489 struct page **pt_pages[GEN8_LEGACY_PDPS]; 490 int i, ret; 491 492 for (i = 0; i < max_pdp; i++) { 493 pt_pages[i] = __gen8_alloc_page_tables(); 494 if (IS_ERR(pt_pages[i])) { 495 ret = PTR_ERR(pt_pages[i]); 496 goto unwind_out; 497 } 498 } 499 500 /* NB: Avoid touching gen8_pt_pages until last to keep the allocation, 501 * "atomic" - for cleanup purposes. 502 */ 503 for (i = 0; i < max_pdp; i++) 504 ppgtt->gen8_pt_pages[i] = pt_pages[i]; 505 506 return 0; 507 508 unwind_out: 509 while (i--) { 510 gen8_free_page_tables(pt_pages[i]); 511 kfree(pt_pages[i]); 512 } 513 514 return ret; 515 } 516 517 static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt) 518 { 519 int i; 520 521 for (i = 0; i < ppgtt->num_pd_pages; i++) { 522 ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE, 523 sizeof(dma_addr_t), 524 GFP_KERNEL); 525 if (!ppgtt->gen8_pt_dma_addr[i]) 526 return -ENOMEM; 527 } 528 529 return 0; 530 } 531 532 static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt, 533 const int max_pdp) 534 { 535 ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT)); 536 if (!ppgtt->pd_pages) 537 return -ENOMEM; 538 539 ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT); 540 BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS); 541 542 return 0; 543 } 544 545 static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt, 546 const int max_pdp) 547 { 548 int ret; 549 550 ret = gen8_ppgtt_allocate_page_directories(ppgtt, max_pdp); 551 if (ret) 552 return ret; 553 554 ret = gen8_ppgtt_allocate_page_tables(ppgtt, max_pdp); 555 if (ret) { 556 __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT)); 557 return ret; 558 } 559 560 ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE; 561 562 ret = gen8_ppgtt_allocate_dma(ppgtt); 563 if (ret) 564 gen8_ppgtt_free(ppgtt); 565 566 return ret; 567 } 568 569 static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt, 570 const int pd) 571 { 572 dma_addr_t pd_addr; 573 int ret; 574 575 pd_addr = pci_map_page(ppgtt->base.dev->pdev, 576 &ppgtt->pd_pages[pd], 0, 577 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 578 579 ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr); 580 if (ret) 581 return ret; 582 583 ppgtt->pd_dma_addr[pd] = pd_addr; 584 585 return 0; 586 } 587 588 static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt, 589 const int pd, 590 const int pt) 591 { 592 dma_addr_t pt_addr; 593 struct page *p; 594 int ret; 595 596 p = ppgtt->gen8_pt_pages[pd][pt]; 597 pt_addr = pci_map_page(ppgtt->base.dev->pdev, 598 p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 599 ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr); 600 if (ret) 601 return ret; 602 603 ppgtt->gen8_pt_dma_addr[pd][pt] = pt_addr; 604 605 return 0; 606 } 607 608 /** 609 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 610 * with a net effect resembling a 2-level page table in normal x86 terms. Each 611 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 612 * space. 613 * 614 * FIXME: split allocation into smaller pieces. For now we only ever do this 615 * once, but with full PPGTT, the multiple contiguous allocations will be bad. 616 * TODO: Do something with the size parameter 617 */ 618 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) 619 { 620 const int max_pdp = DIV_ROUND_UP(size, 1 << 30); 621 const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; 622 int i, j, ret; 623 624 if (size % (1<<30)) 625 DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size); 626 627 /* 1. Do all our allocations for page directories and page tables. */ 628 ret = gen8_ppgtt_alloc(ppgtt, max_pdp); 629 if (ret) 630 return ret; 631 632 /* 633 * 2. Create DMA mappings for the page directories and page tables. 634 */ 635 for (i = 0; i < max_pdp; i++) { 636 ret = gen8_ppgtt_setup_page_directories(ppgtt, i); 637 if (ret) 638 goto bail; 639 640 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 641 ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j); 642 if (ret) 643 goto bail; 644 } 645 } 646 647 /* 648 * 3. Map all the page directory entires to point to the page tables 649 * we've allocated. 650 * 651 * For now, the PPGTT helper functions all require that the PDEs are 652 * plugged in correctly. So we do that now/here. For aliasing PPGTT, we 653 * will never need to touch the PDEs again. 654 */ 655 for (i = 0; i < max_pdp; i++) { 656 gen8_ppgtt_pde_t *pd_vaddr; 657 pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]); 658 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 659 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; 660 pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr, 661 I915_CACHE_LLC); 662 } 663 if (!HAS_LLC(ppgtt->base.dev)) 664 drm_clflush_virt_range(pd_vaddr, PAGE_SIZE); 665 kunmap_atomic(pd_vaddr); 666 } 667 668 ppgtt->switch_mm = gen8_mm_switch; 669 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 670 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 671 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 672 ppgtt->base.start = 0; 673 ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE; 674 675 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); 676 677 DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n", 678 ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp); 679 DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n", 680 ppgtt->num_pd_entries, 681 (ppgtt->num_pd_entries - min_pt_pages) + size % (1<<30)); 682 return 0; 683 684 bail: 685 gen8_ppgtt_unmap_pages(ppgtt); 686 gen8_ppgtt_free(ppgtt); 687 return ret; 688 } 689 690 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 691 { 692 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; 693 struct i915_address_space *vm = &ppgtt->base; 694 gen6_gtt_pte_t __iomem *pd_addr; 695 gen6_gtt_pte_t scratch_pte; 696 uint32_t pd_entry; 697 int pte, pde; 698 699 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0); 700 701 pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + 702 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); 703 704 seq_printf(m, " VM %p (pd_offset %x-%x):\n", vm, 705 ppgtt->pd_offset, ppgtt->pd_offset + ppgtt->num_pd_entries); 706 for (pde = 0; pde < ppgtt->num_pd_entries; pde++) { 707 u32 expected; 708 gen6_gtt_pte_t *pt_vaddr; 709 dma_addr_t pt_addr = ppgtt->pt_dma_addr[pde]; 710 pd_entry = readl(pd_addr + pde); 711 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 712 713 if (pd_entry != expected) 714 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 715 pde, 716 pd_entry, 717 expected); 718 seq_printf(m, "\tPDE: %x\n", pd_entry); 719 720 pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]); 721 for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) { 722 unsigned long va = 723 (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) + 724 (pte * PAGE_SIZE); 725 int i; 726 bool found = false; 727 for (i = 0; i < 4; i++) 728 if (pt_vaddr[pte + i] != scratch_pte) 729 found = true; 730 if (!found) 731 continue; 732 733 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 734 for (i = 0; i < 4; i++) { 735 if (pt_vaddr[pte + i] != scratch_pte) 736 seq_printf(m, " %08x", pt_vaddr[pte + i]); 737 else 738 seq_puts(m, " SCRATCH "); 739 } 740 seq_puts(m, "\n"); 741 } 742 kunmap_atomic(pt_vaddr); 743 } 744 } 745 746 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) 747 { 748 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; 749 gen6_gtt_pte_t __iomem *pd_addr; 750 uint32_t pd_entry; 751 int i; 752 753 WARN_ON(ppgtt->pd_offset & 0x3f); 754 pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm + 755 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); 756 for (i = 0; i < ppgtt->num_pd_entries; i++) { 757 dma_addr_t pt_addr; 758 759 pt_addr = ppgtt->pt_dma_addr[i]; 760 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); 761 pd_entry |= GEN6_PDE_VALID; 762 763 writel(pd_entry, pd_addr + i); 764 } 765 readl(pd_addr); 766 } 767 768 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) 769 { 770 BUG_ON(ppgtt->pd_offset & 0x3f); 771 772 return (ppgtt->pd_offset / 64) << 16; 773 } 774 775 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 776 struct intel_engine_cs *ring) 777 { 778 int ret; 779 780 /* NB: TLBs must be flushed and invalidated before a switch */ 781 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 782 if (ret) 783 return ret; 784 785 ret = intel_ring_begin(ring, 6); 786 if (ret) 787 return ret; 788 789 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 790 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring)); 791 intel_ring_emit(ring, PP_DIR_DCLV_2G); 792 intel_ring_emit(ring, RING_PP_DIR_BASE(ring)); 793 intel_ring_emit(ring, get_pd_offset(ppgtt)); 794 intel_ring_emit(ring, MI_NOOP); 795 intel_ring_advance(ring); 796 797 return 0; 798 } 799 800 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 801 struct intel_engine_cs *ring) 802 { 803 int ret; 804 805 /* NB: TLBs must be flushed and invalidated before a switch */ 806 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 807 if (ret) 808 return ret; 809 810 ret = intel_ring_begin(ring, 6); 811 if (ret) 812 return ret; 813 814 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 815 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring)); 816 intel_ring_emit(ring, PP_DIR_DCLV_2G); 817 intel_ring_emit(ring, RING_PP_DIR_BASE(ring)); 818 intel_ring_emit(ring, get_pd_offset(ppgtt)); 819 intel_ring_emit(ring, MI_NOOP); 820 intel_ring_advance(ring); 821 822 /* XXX: RCS is the only one to auto invalidate the TLBs? */ 823 if (ring->id != RCS) { 824 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 825 if (ret) 826 return ret; 827 } 828 829 return 0; 830 } 831 832 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 833 struct intel_engine_cs *ring) 834 { 835 struct drm_device *dev = ppgtt->base.dev; 836 struct drm_i915_private *dev_priv = dev->dev_private; 837 838 839 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 840 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 841 842 POSTING_READ(RING_PP_DIR_DCLV(ring)); 843 844 return 0; 845 } 846 847 static void gen8_ppgtt_enable(struct drm_device *dev) 848 { 849 struct drm_i915_private *dev_priv = dev->dev_private; 850 struct intel_engine_cs *ring; 851 int j; 852 853 for_each_ring(ring, dev_priv, j) { 854 I915_WRITE(RING_MODE_GEN7(ring), 855 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 856 } 857 } 858 859 static void gen7_ppgtt_enable(struct drm_device *dev) 860 { 861 struct drm_i915_private *dev_priv = dev->dev_private; 862 struct intel_engine_cs *ring; 863 uint32_t ecochk, ecobits; 864 int i; 865 866 ecobits = I915_READ(GAC_ECO_BITS); 867 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 868 869 ecochk = I915_READ(GAM_ECOCHK); 870 if (IS_HASWELL(dev)) { 871 ecochk |= ECOCHK_PPGTT_WB_HSW; 872 } else { 873 ecochk |= ECOCHK_PPGTT_LLC_IVB; 874 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 875 } 876 I915_WRITE(GAM_ECOCHK, ecochk); 877 878 for_each_ring(ring, dev_priv, i) { 879 /* GFX_MODE is per-ring on gen7+ */ 880 I915_WRITE(RING_MODE_GEN7(ring), 881 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 882 } 883 } 884 885 static void gen6_ppgtt_enable(struct drm_device *dev) 886 { 887 struct drm_i915_private *dev_priv = dev->dev_private; 888 uint32_t ecochk, gab_ctl, ecobits; 889 890 ecobits = I915_READ(GAC_ECO_BITS); 891 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 892 ECOBITS_PPGTT_CACHE64B); 893 894 gab_ctl = I915_READ(GAB_CTL); 895 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 896 897 ecochk = I915_READ(GAM_ECOCHK); 898 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 899 900 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 901 } 902 903 /* PPGTT support for Sandybdrige/Gen6 and later */ 904 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 905 uint64_t start, 906 uint64_t length, 907 bool use_scratch) 908 { 909 struct i915_hw_ppgtt *ppgtt = 910 container_of(vm, struct i915_hw_ppgtt, base); 911 gen6_gtt_pte_t *pt_vaddr, scratch_pte; 912 unsigned first_entry = start >> PAGE_SHIFT; 913 unsigned num_entries = length >> PAGE_SHIFT; 914 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 915 unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; 916 unsigned last_pte, i; 917 918 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0); 919 920 while (num_entries) { 921 last_pte = first_pte + num_entries; 922 if (last_pte > I915_PPGTT_PT_ENTRIES) 923 last_pte = I915_PPGTT_PT_ENTRIES; 924 925 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 926 927 for (i = first_pte; i < last_pte; i++) 928 pt_vaddr[i] = scratch_pte; 929 930 kunmap_atomic(pt_vaddr); 931 932 num_entries -= last_pte - first_pte; 933 first_pte = 0; 934 act_pt++; 935 } 936 } 937 938 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 939 struct sg_table *pages, 940 uint64_t start, 941 enum i915_cache_level cache_level, u32 flags) 942 { 943 struct i915_hw_ppgtt *ppgtt = 944 container_of(vm, struct i915_hw_ppgtt, base); 945 gen6_gtt_pte_t *pt_vaddr; 946 unsigned first_entry = start >> PAGE_SHIFT; 947 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 948 unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES; 949 struct sg_page_iter sg_iter; 950 951 pt_vaddr = NULL; 952 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { 953 if (pt_vaddr == NULL) 954 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 955 956 pt_vaddr[act_pte] = 957 vm->pte_encode(sg_page_iter_dma_address(&sg_iter), 958 cache_level, true, flags); 959 960 if (++act_pte == I915_PPGTT_PT_ENTRIES) { 961 kunmap_atomic(pt_vaddr); 962 pt_vaddr = NULL; 963 act_pt++; 964 act_pte = 0; 965 } 966 } 967 if (pt_vaddr) 968 kunmap_atomic(pt_vaddr); 969 } 970 971 static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) 972 { 973 int i; 974 975 if (ppgtt->pt_dma_addr) { 976 for (i = 0; i < ppgtt->num_pd_entries; i++) 977 pci_unmap_page(ppgtt->base.dev->pdev, 978 ppgtt->pt_dma_addr[i], 979 4096, PCI_DMA_BIDIRECTIONAL); 980 } 981 } 982 983 static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt) 984 { 985 int i; 986 987 kfree(ppgtt->pt_dma_addr); 988 for (i = 0; i < ppgtt->num_pd_entries; i++) 989 __free_page(ppgtt->pt_pages[i]); 990 kfree(ppgtt->pt_pages); 991 } 992 993 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 994 { 995 struct i915_hw_ppgtt *ppgtt = 996 container_of(vm, struct i915_hw_ppgtt, base); 997 998 drm_mm_remove_node(&ppgtt->node); 999 1000 gen6_ppgtt_unmap_pages(ppgtt); 1001 gen6_ppgtt_free(ppgtt); 1002 } 1003 1004 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 1005 { 1006 struct drm_device *dev = ppgtt->base.dev; 1007 struct drm_i915_private *dev_priv = dev->dev_private; 1008 bool retried = false; 1009 int ret; 1010 1011 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 1012 * allocator works in address space sizes, so it's multiplied by page 1013 * size. We allocate at the top of the GTT to avoid fragmentation. 1014 */ 1015 BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm)); 1016 alloc: 1017 ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm, 1018 &ppgtt->node, GEN6_PD_SIZE, 1019 GEN6_PD_ALIGN, 0, 1020 0, dev_priv->gtt.base.total, 1021 DRM_MM_TOPDOWN); 1022 if (ret == -ENOSPC && !retried) { 1023 ret = i915_gem_evict_something(dev, &dev_priv->gtt.base, 1024 GEN6_PD_SIZE, GEN6_PD_ALIGN, 1025 I915_CACHE_NONE, 1026 0, dev_priv->gtt.base.total, 1027 0); 1028 if (ret) 1029 return ret; 1030 1031 retried = true; 1032 goto alloc; 1033 } 1034 1035 if (ppgtt->node.start < dev_priv->gtt.mappable_end) 1036 DRM_DEBUG("Forced to use aperture for PDEs\n"); 1037 1038 ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES; 1039 return ret; 1040 } 1041 1042 static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt) 1043 { 1044 int i; 1045 1046 ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *), 1047 GFP_KERNEL); 1048 1049 if (!ppgtt->pt_pages) 1050 return -ENOMEM; 1051 1052 for (i = 0; i < ppgtt->num_pd_entries; i++) { 1053 ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL); 1054 if (!ppgtt->pt_pages[i]) { 1055 gen6_ppgtt_free(ppgtt); 1056 return -ENOMEM; 1057 } 1058 } 1059 1060 return 0; 1061 } 1062 1063 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 1064 { 1065 int ret; 1066 1067 ret = gen6_ppgtt_allocate_page_directories(ppgtt); 1068 if (ret) 1069 return ret; 1070 1071 ret = gen6_ppgtt_allocate_page_tables(ppgtt); 1072 if (ret) { 1073 drm_mm_remove_node(&ppgtt->node); 1074 return ret; 1075 } 1076 1077 ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t), 1078 GFP_KERNEL); 1079 if (!ppgtt->pt_dma_addr) { 1080 drm_mm_remove_node(&ppgtt->node); 1081 gen6_ppgtt_free(ppgtt); 1082 return -ENOMEM; 1083 } 1084 1085 return 0; 1086 } 1087 1088 static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt) 1089 { 1090 struct drm_device *dev = ppgtt->base.dev; 1091 int i; 1092 1093 for (i = 0; i < ppgtt->num_pd_entries; i++) { 1094 dma_addr_t pt_addr; 1095 1096 pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096, 1097 PCI_DMA_BIDIRECTIONAL); 1098 1099 if (pci_dma_mapping_error(dev->pdev, pt_addr)) { 1100 gen6_ppgtt_unmap_pages(ppgtt); 1101 return -EIO; 1102 } 1103 1104 ppgtt->pt_dma_addr[i] = pt_addr; 1105 } 1106 1107 return 0; 1108 } 1109 1110 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1111 { 1112 struct drm_device *dev = ppgtt->base.dev; 1113 struct drm_i915_private *dev_priv = dev->dev_private; 1114 int ret; 1115 1116 ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; 1117 if (IS_GEN6(dev)) { 1118 ppgtt->switch_mm = gen6_mm_switch; 1119 } else if (IS_HASWELL(dev)) { 1120 ppgtt->switch_mm = hsw_mm_switch; 1121 } else if (IS_GEN7(dev)) { 1122 ppgtt->switch_mm = gen7_mm_switch; 1123 } else 1124 BUG(); 1125 1126 ret = gen6_ppgtt_alloc(ppgtt); 1127 if (ret) 1128 return ret; 1129 1130 ret = gen6_ppgtt_setup_page_tables(ppgtt); 1131 if (ret) { 1132 gen6_ppgtt_free(ppgtt); 1133 return ret; 1134 } 1135 1136 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 1137 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 1138 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 1139 ppgtt->base.start = 0; 1140 ppgtt->base.total = ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES * PAGE_SIZE; 1141 ppgtt->debug_dump = gen6_dump_ppgtt; 1142 1143 ppgtt->pd_offset = 1144 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t); 1145 1146 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); 1147 1148 DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n", 1149 ppgtt->node.size >> 20, 1150 ppgtt->node.start / PAGE_SIZE); 1151 1152 gen6_write_pdes(ppgtt); 1153 DRM_DEBUG("Adding PPGTT at offset %x\n", 1154 ppgtt->pd_offset << 10); 1155 1156 return 0; 1157 } 1158 1159 static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) 1160 { 1161 struct drm_i915_private *dev_priv = dev->dev_private; 1162 1163 ppgtt->base.dev = dev; 1164 ppgtt->base.scratch = dev_priv->gtt.base.scratch; 1165 1166 if (INTEL_INFO(dev)->gen < 8) 1167 return gen6_ppgtt_init(ppgtt); 1168 else 1169 return gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total); 1170 } 1171 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) 1172 { 1173 struct drm_i915_private *dev_priv = dev->dev_private; 1174 int ret = 0; 1175 1176 ret = __hw_ppgtt_init(dev, ppgtt); 1177 if (ret == 0) { 1178 kref_init(&ppgtt->ref); 1179 drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, 1180 ppgtt->base.total); 1181 i915_init_vm(dev_priv, &ppgtt->base); 1182 } 1183 1184 return ret; 1185 } 1186 1187 int i915_ppgtt_init_hw(struct drm_device *dev) 1188 { 1189 struct drm_i915_private *dev_priv = dev->dev_private; 1190 struct intel_engine_cs *ring; 1191 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 1192 int i, ret = 0; 1193 1194 /* In the case of execlists, PPGTT is enabled by the context descriptor 1195 * and the PDPs are contained within the context itself. We don't 1196 * need to do anything here. */ 1197 if (i915.enable_execlists) 1198 return 0; 1199 1200 if (!USES_PPGTT(dev)) 1201 return 0; 1202 1203 if (IS_GEN6(dev)) 1204 gen6_ppgtt_enable(dev); 1205 else if (IS_GEN7(dev)) 1206 gen7_ppgtt_enable(dev); 1207 else if (INTEL_INFO(dev)->gen >= 8) 1208 gen8_ppgtt_enable(dev); 1209 else 1210 MISSING_CASE(INTEL_INFO(dev)->gen); 1211 1212 if (ppgtt) { 1213 for_each_ring(ring, dev_priv, i) { 1214 ret = ppgtt->switch_mm(ppgtt, ring); 1215 if (ret != 0) 1216 return ret; 1217 } 1218 } 1219 1220 return ret; 1221 } 1222 struct i915_hw_ppgtt * 1223 i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) 1224 { 1225 struct i915_hw_ppgtt *ppgtt; 1226 int ret; 1227 1228 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 1229 if (!ppgtt) 1230 return ERR_PTR(-ENOMEM); 1231 1232 ret = i915_ppgtt_init(dev, ppgtt); 1233 if (ret) { 1234 kfree(ppgtt); 1235 return ERR_PTR(ret); 1236 } 1237 1238 ppgtt->file_priv = fpriv; 1239 1240 trace_i915_ppgtt_create(&ppgtt->base); 1241 1242 return ppgtt; 1243 } 1244 1245 void i915_ppgtt_release(struct kref *kref) 1246 { 1247 struct i915_hw_ppgtt *ppgtt = 1248 container_of(kref, struct i915_hw_ppgtt, ref); 1249 1250 trace_i915_ppgtt_release(&ppgtt->base); 1251 1252 /* vmas should already be unbound */ 1253 WARN_ON(!list_empty(&ppgtt->base.active_list)); 1254 WARN_ON(!list_empty(&ppgtt->base.inactive_list)); 1255 1256 list_del(&ppgtt->base.global_link); 1257 drm_mm_takedown(&ppgtt->base.mm); 1258 1259 ppgtt->base.cleanup(&ppgtt->base); 1260 kfree(ppgtt); 1261 } 1262 1263 static void 1264 ppgtt_bind_vma(struct i915_vma *vma, 1265 enum i915_cache_level cache_level, 1266 u32 flags) 1267 { 1268 /* Currently applicable only to VLV */ 1269 if (vma->obj->gt_ro) 1270 flags |= PTE_READ_ONLY; 1271 1272 vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start, 1273 cache_level, flags); 1274 } 1275 1276 static void ppgtt_unbind_vma(struct i915_vma *vma) 1277 { 1278 vma->vm->clear_range(vma->vm, 1279 vma->node.start, 1280 vma->obj->base.size, 1281 true); 1282 } 1283 1284 extern int intel_iommu_gfx_mapped; 1285 /* Certain Gen5 chipsets require require idling the GPU before 1286 * unmapping anything from the GTT when VT-d is enabled. 1287 */ 1288 static inline bool needs_idle_maps(struct drm_device *dev) 1289 { 1290 #ifdef CONFIG_INTEL_IOMMU 1291 /* Query intel_iommu to see if we need the workaround. Presumably that 1292 * was loaded first. 1293 */ 1294 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) 1295 return true; 1296 #endif 1297 return false; 1298 } 1299 1300 static bool do_idling(struct drm_i915_private *dev_priv) 1301 { 1302 bool ret = dev_priv->mm.interruptible; 1303 1304 if (unlikely(dev_priv->gtt.do_idle_maps)) { 1305 dev_priv->mm.interruptible = false; 1306 if (i915_gpu_idle(dev_priv->dev)) { 1307 DRM_ERROR("Couldn't idle GPU\n"); 1308 /* Wait a bit, in hopes it avoids the hang */ 1309 udelay(10); 1310 } 1311 } 1312 1313 return ret; 1314 } 1315 1316 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) 1317 { 1318 if (unlikely(dev_priv->gtt.do_idle_maps)) 1319 dev_priv->mm.interruptible = interruptible; 1320 } 1321 1322 void i915_check_and_clear_faults(struct drm_device *dev) 1323 { 1324 struct drm_i915_private *dev_priv = dev->dev_private; 1325 struct intel_engine_cs *ring; 1326 int i; 1327 1328 if (INTEL_INFO(dev)->gen < 6) 1329 return; 1330 1331 for_each_ring(ring, dev_priv, i) { 1332 u32 fault_reg; 1333 fault_reg = I915_READ(RING_FAULT_REG(ring)); 1334 if (fault_reg & RING_FAULT_VALID) { 1335 DRM_DEBUG_DRIVER("Unexpected fault\n" 1336 "\tAddr: 0x%08lx\n" 1337 "\tAddress space: %s\n" 1338 "\tSource ID: %d\n" 1339 "\tType: %d\n", 1340 fault_reg & PAGE_MASK, 1341 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 1342 RING_FAULT_SRCID(fault_reg), 1343 RING_FAULT_FAULT_TYPE(fault_reg)); 1344 I915_WRITE(RING_FAULT_REG(ring), 1345 fault_reg & ~RING_FAULT_VALID); 1346 } 1347 } 1348 POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS])); 1349 } 1350 1351 static void i915_ggtt_flush(struct drm_i915_private *dev_priv) 1352 { 1353 if (INTEL_INFO(dev_priv->dev)->gen < 6) { 1354 intel_gtt_chipset_flush(); 1355 } else { 1356 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1357 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1358 } 1359 } 1360 1361 void i915_gem_suspend_gtt_mappings(struct drm_device *dev) 1362 { 1363 struct drm_i915_private *dev_priv = dev->dev_private; 1364 1365 /* Don't bother messing with faults pre GEN6 as we have little 1366 * documentation supporting that it's a good idea. 1367 */ 1368 if (INTEL_INFO(dev)->gen < 6) 1369 return; 1370 1371 i915_check_and_clear_faults(dev); 1372 1373 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 1374 dev_priv->gtt.base.start, 1375 dev_priv->gtt.base.total, 1376 true); 1377 1378 i915_ggtt_flush(dev_priv); 1379 } 1380 1381 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 1382 { 1383 struct drm_i915_private *dev_priv = dev->dev_private; 1384 struct drm_i915_gem_object *obj; 1385 struct i915_address_space *vm; 1386 1387 i915_check_and_clear_faults(dev); 1388 1389 /* First fill our portion of the GTT with scratch pages */ 1390 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 1391 dev_priv->gtt.base.start, 1392 dev_priv->gtt.base.total, 1393 true); 1394 1395 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 1396 struct i915_vma *vma = i915_gem_obj_to_vma(obj, 1397 &dev_priv->gtt.base); 1398 if (!vma) 1399 continue; 1400 1401 i915_gem_clflush_object(obj, obj->pin_display); 1402 /* The bind_vma code tries to be smart about tracking mappings. 1403 * Unfortunately above, we've just wiped out the mappings 1404 * without telling our object about it. So we need to fake it. 1405 * 1406 * Bind is not expected to fail since this is only called on 1407 * resume and assumption is all requirements exist already. 1408 */ 1409 vma->bound &= ~GLOBAL_BIND; 1410 WARN_ON(i915_vma_bind(vma, obj->cache_level, GLOBAL_BIND)); 1411 } 1412 1413 1414 if (INTEL_INFO(dev)->gen >= 8) { 1415 if (IS_CHERRYVIEW(dev)) 1416 chv_setup_private_ppat(dev_priv); 1417 else 1418 bdw_setup_private_ppat(dev_priv); 1419 1420 return; 1421 } 1422 1423 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 1424 /* TODO: Perhaps it shouldn't be gen6 specific */ 1425 if (i915_is_ggtt(vm)) { 1426 if (dev_priv->mm.aliasing_ppgtt) 1427 gen6_write_pdes(dev_priv->mm.aliasing_ppgtt); 1428 continue; 1429 } 1430 1431 gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base)); 1432 } 1433 1434 i915_ggtt_flush(dev_priv); 1435 } 1436 1437 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) 1438 { 1439 if (obj->has_dma_mapping) 1440 return 0; 1441 1442 if (!dma_map_sg(&obj->base.dev->pdev->dev, 1443 obj->pages->sgl, obj->pages->nents, 1444 PCI_DMA_BIDIRECTIONAL)) 1445 return -ENOSPC; 1446 1447 return 0; 1448 } 1449 1450 static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte) 1451 { 1452 #ifdef writeq 1453 writeq(pte, addr); 1454 #else 1455 iowrite32((u32)pte, addr); 1456 iowrite32(pte >> 32, addr + 4); 1457 #endif 1458 } 1459 1460 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 1461 struct sg_table *st, 1462 uint64_t start, 1463 enum i915_cache_level level, u32 unused) 1464 { 1465 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1466 unsigned first_entry = start >> PAGE_SHIFT; 1467 gen8_gtt_pte_t __iomem *gtt_entries = 1468 (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1469 int i = 0; 1470 struct sg_page_iter sg_iter; 1471 dma_addr_t addr = 0; /* shut up gcc */ 1472 1473 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 1474 addr = sg_dma_address(sg_iter.sg) + 1475 (sg_iter.sg_pgoffset << PAGE_SHIFT); 1476 gen8_set_pte(>t_entries[i], 1477 gen8_pte_encode(addr, level, true)); 1478 i++; 1479 } 1480 1481 /* 1482 * XXX: This serves as a posting read to make sure that the PTE has 1483 * actually been updated. There is some concern that even though 1484 * registers and PTEs are within the same BAR that they are potentially 1485 * of NUMA access patterns. Therefore, even with the way we assume 1486 * hardware should work, we must keep this posting read for paranoia. 1487 */ 1488 if (i != 0) 1489 WARN_ON(readq(>t_entries[i-1]) 1490 != gen8_pte_encode(addr, level, true)); 1491 1492 /* This next bit makes the above posting read even more important. We 1493 * want to flush the TLBs only after we're certain all the PTE updates 1494 * have finished. 1495 */ 1496 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1497 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1498 } 1499 1500 /* 1501 * Binds an object into the global gtt with the specified cache level. The object 1502 * will be accessible to the GPU via commands whose operands reference offsets 1503 * within the global GTT as well as accessible by the GPU through the GMADR 1504 * mapped BAR (dev_priv->mm.gtt->gtt). 1505 */ 1506 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 1507 struct sg_table *st, 1508 uint64_t start, 1509 enum i915_cache_level level, u32 flags) 1510 { 1511 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1512 unsigned first_entry = start >> PAGE_SHIFT; 1513 gen6_gtt_pte_t __iomem *gtt_entries = 1514 (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1515 int i = 0; 1516 struct sg_page_iter sg_iter; 1517 dma_addr_t addr = 0; 1518 1519 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 1520 addr = sg_page_iter_dma_address(&sg_iter); 1521 iowrite32(vm->pte_encode(addr, level, true, flags), >t_entries[i]); 1522 i++; 1523 } 1524 1525 /* XXX: This serves as a posting read to make sure that the PTE has 1526 * actually been updated. There is some concern that even though 1527 * registers and PTEs are within the same BAR that they are potentially 1528 * of NUMA access patterns. Therefore, even with the way we assume 1529 * hardware should work, we must keep this posting read for paranoia. 1530 */ 1531 if (i != 0) { 1532 unsigned long gtt = readl(>t_entries[i-1]); 1533 WARN_ON(gtt != vm->pte_encode(addr, level, true, flags)); 1534 } 1535 1536 /* This next bit makes the above posting read even more important. We 1537 * want to flush the TLBs only after we're certain all the PTE updates 1538 * have finished. 1539 */ 1540 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1541 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1542 } 1543 1544 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 1545 uint64_t start, 1546 uint64_t length, 1547 bool use_scratch) 1548 { 1549 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1550 unsigned first_entry = start >> PAGE_SHIFT; 1551 unsigned num_entries = length >> PAGE_SHIFT; 1552 gen8_gtt_pte_t scratch_pte, __iomem *gtt_base = 1553 (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 1554 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 1555 int i; 1556 1557 if (WARN(num_entries > max_entries, 1558 "First entry = %d; Num entries = %d (max=%d)\n", 1559 first_entry, num_entries, max_entries)) 1560 num_entries = max_entries; 1561 1562 scratch_pte = gen8_pte_encode(vm->scratch.addr, 1563 I915_CACHE_LLC, 1564 use_scratch); 1565 for (i = 0; i < num_entries; i++) 1566 gen8_set_pte(>t_base[i], scratch_pte); 1567 readl(gtt_base); 1568 } 1569 1570 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 1571 uint64_t start, 1572 uint64_t length, 1573 bool use_scratch) 1574 { 1575 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1576 unsigned first_entry = start >> PAGE_SHIFT; 1577 unsigned num_entries = length >> PAGE_SHIFT; 1578 gen6_gtt_pte_t scratch_pte, __iomem *gtt_base = 1579 (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 1580 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 1581 int i; 1582 1583 if (WARN(num_entries > max_entries, 1584 "First entry = %d; Num entries = %d (max=%d)\n", 1585 first_entry, num_entries, max_entries)) 1586 num_entries = max_entries; 1587 1588 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch, 0); 1589 1590 for (i = 0; i < num_entries; i++) 1591 iowrite32(scratch_pte, >t_base[i]); 1592 readl(gtt_base); 1593 } 1594 1595 1596 static void i915_ggtt_bind_vma(struct i915_vma *vma, 1597 enum i915_cache_level cache_level, 1598 u32 unused) 1599 { 1600 const unsigned long entry = vma->node.start >> PAGE_SHIFT; 1601 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 1602 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 1603 1604 BUG_ON(!i915_is_ggtt(vma->vm)); 1605 intel_gtt_insert_sg_entries(vma->ggtt_view.pages, entry, flags); 1606 vma->bound = GLOBAL_BIND; 1607 } 1608 1609 static void i915_ggtt_clear_range(struct i915_address_space *vm, 1610 uint64_t start, 1611 uint64_t length, 1612 bool unused) 1613 { 1614 unsigned first_entry = start >> PAGE_SHIFT; 1615 unsigned num_entries = length >> PAGE_SHIFT; 1616 intel_gtt_clear_range(first_entry, num_entries); 1617 } 1618 1619 static void i915_ggtt_unbind_vma(struct i915_vma *vma) 1620 { 1621 const unsigned int first = vma->node.start >> PAGE_SHIFT; 1622 const unsigned int size = vma->obj->base.size >> PAGE_SHIFT; 1623 1624 BUG_ON(!i915_is_ggtt(vma->vm)); 1625 vma->bound = 0; 1626 intel_gtt_clear_range(first, size); 1627 } 1628 1629 static void ggtt_bind_vma(struct i915_vma *vma, 1630 enum i915_cache_level cache_level, 1631 u32 flags) 1632 { 1633 struct drm_device *dev = vma->vm->dev; 1634 struct drm_i915_private *dev_priv = dev->dev_private; 1635 struct drm_i915_gem_object *obj = vma->obj; 1636 1637 /* Currently applicable only to VLV */ 1638 if (obj->gt_ro) 1639 flags |= PTE_READ_ONLY; 1640 1641 /* If there is no aliasing PPGTT, or the caller needs a global mapping, 1642 * or we have a global mapping already but the cacheability flags have 1643 * changed, set the global PTEs. 1644 * 1645 * If there is an aliasing PPGTT it is anecdotally faster, so use that 1646 * instead if none of the above hold true. 1647 * 1648 * NB: A global mapping should only be needed for special regions like 1649 * "gtt mappable", SNB errata, or if specified via special execbuf 1650 * flags. At all other times, the GPU will use the aliasing PPGTT. 1651 */ 1652 if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) { 1653 if (!(vma->bound & GLOBAL_BIND) || 1654 (cache_level != obj->cache_level)) { 1655 vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages, 1656 vma->node.start, 1657 cache_level, flags); 1658 vma->bound |= GLOBAL_BIND; 1659 } 1660 } 1661 1662 if (dev_priv->mm.aliasing_ppgtt && 1663 (!(vma->bound & LOCAL_BIND) || 1664 (cache_level != obj->cache_level))) { 1665 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 1666 appgtt->base.insert_entries(&appgtt->base, 1667 vma->ggtt_view.pages, 1668 vma->node.start, 1669 cache_level, flags); 1670 vma->bound |= LOCAL_BIND; 1671 } 1672 } 1673 1674 static void ggtt_unbind_vma(struct i915_vma *vma) 1675 { 1676 struct drm_device *dev = vma->vm->dev; 1677 struct drm_i915_private *dev_priv = dev->dev_private; 1678 struct drm_i915_gem_object *obj = vma->obj; 1679 1680 if (vma->bound & GLOBAL_BIND) { 1681 vma->vm->clear_range(vma->vm, 1682 vma->node.start, 1683 obj->base.size, 1684 true); 1685 vma->bound &= ~GLOBAL_BIND; 1686 } 1687 1688 if (vma->bound & LOCAL_BIND) { 1689 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 1690 appgtt->base.clear_range(&appgtt->base, 1691 vma->node.start, 1692 obj->base.size, 1693 true); 1694 vma->bound &= ~LOCAL_BIND; 1695 } 1696 } 1697 1698 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 1699 { 1700 struct drm_device *dev = obj->base.dev; 1701 struct drm_i915_private *dev_priv = dev->dev_private; 1702 bool interruptible; 1703 1704 interruptible = do_idling(dev_priv); 1705 1706 if (!obj->has_dma_mapping) 1707 dma_unmap_sg(&dev->pdev->dev, 1708 obj->pages->sgl, obj->pages->nents, 1709 PCI_DMA_BIDIRECTIONAL); 1710 1711 undo_idling(dev_priv, interruptible); 1712 } 1713 1714 static void i915_gtt_color_adjust(struct drm_mm_node *node, 1715 unsigned long color, 1716 unsigned long *start, 1717 unsigned long *end) 1718 { 1719 if (node->color != color) 1720 *start += 4096; 1721 1722 if (!list_empty(&node->node_list)) { 1723 node = list_entry(node->node_list.next, 1724 struct drm_mm_node, 1725 node_list); 1726 if (node->allocated && node->color != color) 1727 *end -= 4096; 1728 } 1729 } 1730 1731 static int i915_gem_setup_global_gtt(struct drm_device *dev, 1732 unsigned long start, 1733 unsigned long mappable_end, 1734 unsigned long end) 1735 { 1736 /* Let GEM Manage all of the aperture. 1737 * 1738 * However, leave one page at the end still bound to the scratch page. 1739 * There are a number of places where the hardware apparently prefetches 1740 * past the end of the object, and we've seen multiple hangs with the 1741 * GPU head pointer stuck in a batchbuffer bound at the last page of the 1742 * aperture. One page should be enough to keep any prefetching inside 1743 * of the aperture. 1744 */ 1745 struct drm_i915_private *dev_priv = dev->dev_private; 1746 struct i915_address_space *ggtt_vm = &dev_priv->gtt.base; 1747 struct drm_mm_node *entry; 1748 struct drm_i915_gem_object *obj; 1749 unsigned long hole_start, hole_end; 1750 int ret; 1751 1752 BUG_ON(mappable_end > end); 1753 1754 /* Subtract the guard page ... */ 1755 drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE); 1756 if (!HAS_LLC(dev)) 1757 dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust; 1758 1759 /* Mark any preallocated objects as occupied */ 1760 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 1761 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); 1762 1763 DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n", 1764 i915_gem_obj_ggtt_offset(obj), obj->base.size); 1765 1766 WARN_ON(i915_gem_obj_ggtt_bound(obj)); 1767 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node); 1768 if (ret) { 1769 DRM_DEBUG_KMS("Reservation failed: %i\n", ret); 1770 return ret; 1771 } 1772 vma->bound |= GLOBAL_BIND; 1773 } 1774 1775 dev_priv->gtt.base.start = start; 1776 dev_priv->gtt.base.total = end - start; 1777 1778 /* Clear any non-preallocated blocks */ 1779 drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) { 1780 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 1781 hole_start, hole_end); 1782 ggtt_vm->clear_range(ggtt_vm, hole_start, 1783 hole_end - hole_start, true); 1784 } 1785 1786 /* And finally clear the reserved guard page */ 1787 ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true); 1788 1789 if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) { 1790 struct i915_hw_ppgtt *ppgtt; 1791 1792 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 1793 if (!ppgtt) 1794 return -ENOMEM; 1795 1796 ret = __hw_ppgtt_init(dev, ppgtt); 1797 if (ret != 0) 1798 return ret; 1799 1800 dev_priv->mm.aliasing_ppgtt = ppgtt; 1801 } 1802 1803 return 0; 1804 } 1805 1806 void i915_gem_init_global_gtt(struct drm_device *dev) 1807 { 1808 struct drm_i915_private *dev_priv = dev->dev_private; 1809 unsigned long gtt_size, mappable_size; 1810 1811 gtt_size = dev_priv->gtt.base.total; 1812 mappable_size = dev_priv->gtt.mappable_end; 1813 1814 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); 1815 } 1816 1817 void i915_global_gtt_cleanup(struct drm_device *dev) 1818 { 1819 struct drm_i915_private *dev_priv = dev->dev_private; 1820 struct i915_address_space *vm = &dev_priv->gtt.base; 1821 1822 if (dev_priv->mm.aliasing_ppgtt) { 1823 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 1824 1825 ppgtt->base.cleanup(&ppgtt->base); 1826 } 1827 1828 if (drm_mm_initialized(&vm->mm)) { 1829 drm_mm_takedown(&vm->mm); 1830 list_del(&vm->global_link); 1831 } 1832 1833 vm->cleanup(vm); 1834 } 1835 1836 static int setup_scratch_page(struct drm_device *dev) 1837 { 1838 struct drm_i915_private *dev_priv = dev->dev_private; 1839 struct page *page; 1840 dma_addr_t dma_addr; 1841 1842 page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); 1843 if (page == NULL) 1844 return -ENOMEM; 1845 set_pages_uc(page, 1); 1846 1847 #ifdef CONFIG_INTEL_IOMMU 1848 dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE, 1849 PCI_DMA_BIDIRECTIONAL); 1850 if (pci_dma_mapping_error(dev->pdev, dma_addr)) 1851 return -EINVAL; 1852 #else 1853 dma_addr = page_to_phys(page); 1854 #endif 1855 dev_priv->gtt.base.scratch.page = page; 1856 dev_priv->gtt.base.scratch.addr = dma_addr; 1857 1858 return 0; 1859 } 1860 1861 static void teardown_scratch_page(struct drm_device *dev) 1862 { 1863 struct drm_i915_private *dev_priv = dev->dev_private; 1864 struct page *page = dev_priv->gtt.base.scratch.page; 1865 1866 set_pages_wb(page, 1); 1867 pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr, 1868 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 1869 __free_page(page); 1870 } 1871 1872 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 1873 { 1874 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 1875 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 1876 return snb_gmch_ctl << 20; 1877 } 1878 1879 static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 1880 { 1881 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 1882 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 1883 if (bdw_gmch_ctl) 1884 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 1885 1886 #ifdef CONFIG_X86_32 1887 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ 1888 if (bdw_gmch_ctl > 4) 1889 bdw_gmch_ctl = 4; 1890 #endif 1891 1892 return bdw_gmch_ctl << 20; 1893 } 1894 1895 static inline unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 1896 { 1897 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 1898 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 1899 1900 if (gmch_ctrl) 1901 return 1 << (20 + gmch_ctrl); 1902 1903 return 0; 1904 } 1905 1906 static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 1907 { 1908 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 1909 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 1910 return snb_gmch_ctl << 25; /* 32 MB units */ 1911 } 1912 1913 static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 1914 { 1915 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 1916 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 1917 return bdw_gmch_ctl << 25; /* 32 MB units */ 1918 } 1919 1920 static size_t chv_get_stolen_size(u16 gmch_ctrl) 1921 { 1922 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; 1923 gmch_ctrl &= SNB_GMCH_GMS_MASK; 1924 1925 /* 1926 * 0x0 to 0x10: 32MB increments starting at 0MB 1927 * 0x11 to 0x16: 4MB increments starting at 8MB 1928 * 0x17 to 0x1d: 4MB increments start at 36MB 1929 */ 1930 if (gmch_ctrl < 0x11) 1931 return gmch_ctrl << 25; 1932 else if (gmch_ctrl < 0x17) 1933 return (gmch_ctrl - 0x11 + 2) << 22; 1934 else 1935 return (gmch_ctrl - 0x17 + 9) << 22; 1936 } 1937 1938 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) 1939 { 1940 gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 1941 gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; 1942 1943 if (gen9_gmch_ctl < 0xf0) 1944 return gen9_gmch_ctl << 25; /* 32 MB units */ 1945 else 1946 /* 4MB increments starting at 0xf0 for 4MB */ 1947 return (gen9_gmch_ctl - 0xf0 + 1) << 22; 1948 } 1949 1950 static int ggtt_probe_common(struct drm_device *dev, 1951 size_t gtt_size) 1952 { 1953 struct drm_i915_private *dev_priv = dev->dev_private; 1954 phys_addr_t gtt_phys_addr; 1955 int ret; 1956 1957 /* For Modern GENs the PTEs and register space are split in the BAR */ 1958 gtt_phys_addr = pci_resource_start(dev->pdev, 0) + 1959 (pci_resource_len(dev->pdev, 0) / 2); 1960 1961 dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size); 1962 if (!dev_priv->gtt.gsm) { 1963 DRM_ERROR("Failed to map the gtt page table\n"); 1964 return -ENOMEM; 1965 } 1966 1967 ret = setup_scratch_page(dev); 1968 if (ret) { 1969 DRM_ERROR("Scratch setup failed\n"); 1970 /* iounmap will also get called at remove, but meh */ 1971 iounmap(dev_priv->gtt.gsm); 1972 } 1973 1974 return ret; 1975 } 1976 1977 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 1978 * bits. When using advanced contexts each context stores its own PAT, but 1979 * writing this data shouldn't be harmful even in those cases. */ 1980 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) 1981 { 1982 uint64_t pat; 1983 1984 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 1985 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 1986 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 1987 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 1988 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 1989 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 1990 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 1991 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 1992 1993 if (!USES_PPGTT(dev_priv->dev)) 1994 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry, 1995 * so RTL will always use the value corresponding to 1996 * pat_sel = 000". 1997 * So let's disable cache for GGTT to avoid screen corruptions. 1998 * MOCS still can be used though. 1999 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work 2000 * before this patch, i.e. the same uncached + snooping access 2001 * like on gen6/7 seems to be in effect. 2002 * - So this just fixes blitter/render access. Again it looks 2003 * like it's not just uncached access, but uncached + snooping. 2004 * So we can still hold onto all our assumptions wrt cpu 2005 * clflushing on LLC machines. 2006 */ 2007 pat = GEN8_PPAT(0, GEN8_PPAT_UC); 2008 2009 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 2010 * write would work. */ 2011 I915_WRITE(GEN8_PRIVATE_PAT, pat); 2012 I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32); 2013 } 2014 2015 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) 2016 { 2017 uint64_t pat; 2018 2019 /* 2020 * Map WB on BDW to snooped on CHV. 2021 * 2022 * Only the snoop bit has meaning for CHV, the rest is 2023 * ignored. 2024 * 2025 * The hardware will never snoop for certain types of accesses: 2026 * - CPU GTT (GMADR->GGTT->no snoop->memory) 2027 * - PPGTT page tables 2028 * - some other special cycles 2029 * 2030 * As with BDW, we also need to consider the following for GT accesses: 2031 * "For GGTT, there is NO pat_sel[2:0] from the entry, 2032 * so RTL will always use the value corresponding to 2033 * pat_sel = 000". 2034 * Which means we must set the snoop bit in PAT entry 0 2035 * in order to keep the global status page working. 2036 */ 2037 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 2038 GEN8_PPAT(1, 0) | 2039 GEN8_PPAT(2, 0) | 2040 GEN8_PPAT(3, 0) | 2041 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 2042 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 2043 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 2044 GEN8_PPAT(7, CHV_PPAT_SNOOP); 2045 2046 I915_WRITE(GEN8_PRIVATE_PAT, pat); 2047 I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32); 2048 } 2049 2050 static int gen8_gmch_probe(struct drm_device *dev, 2051 size_t *gtt_total, 2052 size_t *stolen, 2053 phys_addr_t *mappable_base, 2054 unsigned long *mappable_end) 2055 { 2056 struct drm_i915_private *dev_priv = dev->dev_private; 2057 unsigned int gtt_size; 2058 u16 snb_gmch_ctl; 2059 int ret; 2060 2061 /* TODO: We're not aware of mappable constraints on gen8 yet */ 2062 *mappable_base = pci_resource_start(dev->pdev, 2); 2063 *mappable_end = pci_resource_len(dev->pdev, 2); 2064 2065 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39))) 2066 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39)); 2067 2068 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 2069 2070 if (INTEL_INFO(dev)->gen >= 9) { 2071 *stolen = gen9_get_stolen_size(snb_gmch_ctl); 2072 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl); 2073 } else if (IS_CHERRYVIEW(dev)) { 2074 *stolen = chv_get_stolen_size(snb_gmch_ctl); 2075 gtt_size = chv_get_total_gtt_size(snb_gmch_ctl); 2076 } else { 2077 *stolen = gen8_get_stolen_size(snb_gmch_ctl); 2078 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl); 2079 } 2080 2081 *gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT; 2082 2083 if (IS_CHERRYVIEW(dev)) 2084 chv_setup_private_ppat(dev_priv); 2085 else 2086 bdw_setup_private_ppat(dev_priv); 2087 2088 ret = ggtt_probe_common(dev, gtt_size); 2089 2090 dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range; 2091 dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries; 2092 2093 return ret; 2094 } 2095 2096 static int gen6_gmch_probe(struct drm_device *dev, 2097 size_t *gtt_total, 2098 size_t *stolen, 2099 phys_addr_t *mappable_base, 2100 unsigned long *mappable_end) 2101 { 2102 struct drm_i915_private *dev_priv = dev->dev_private; 2103 unsigned int gtt_size; 2104 u16 snb_gmch_ctl; 2105 int ret; 2106 2107 *mappable_base = pci_resource_start(dev->pdev, 2); 2108 *mappable_end = pci_resource_len(dev->pdev, 2); 2109 2110 /* 64/512MB is the current min/max we actually know of, but this is just 2111 * a coarse sanity check. 2112 */ 2113 if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) { 2114 DRM_ERROR("Unknown GMADR size (%lx)\n", 2115 dev_priv->gtt.mappable_end); 2116 return -ENXIO; 2117 } 2118 2119 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) 2120 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); 2121 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 2122 2123 *stolen = gen6_get_stolen_size(snb_gmch_ctl); 2124 2125 gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl); 2126 *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT; 2127 2128 ret = ggtt_probe_common(dev, gtt_size); 2129 2130 dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range; 2131 dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries; 2132 2133 return ret; 2134 } 2135 2136 static void gen6_gmch_remove(struct i915_address_space *vm) 2137 { 2138 2139 struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base); 2140 2141 iounmap(gtt->gsm); 2142 teardown_scratch_page(vm->dev); 2143 } 2144 2145 static int i915_gmch_probe(struct drm_device *dev, 2146 size_t *gtt_total, 2147 size_t *stolen, 2148 phys_addr_t *mappable_base, 2149 unsigned long *mappable_end) 2150 { 2151 struct drm_i915_private *dev_priv = dev->dev_private; 2152 int ret; 2153 2154 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL); 2155 if (!ret) { 2156 DRM_ERROR("failed to set up gmch\n"); 2157 return -EIO; 2158 } 2159 2160 intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end); 2161 2162 dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev); 2163 dev_priv->gtt.base.clear_range = i915_ggtt_clear_range; 2164 2165 if (unlikely(dev_priv->gtt.do_idle_maps)) 2166 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 2167 2168 return 0; 2169 } 2170 2171 static void i915_gmch_remove(struct i915_address_space *vm) 2172 { 2173 intel_gmch_remove(); 2174 } 2175 2176 int i915_gem_gtt_init(struct drm_device *dev) 2177 { 2178 struct drm_i915_private *dev_priv = dev->dev_private; 2179 struct i915_gtt *gtt = &dev_priv->gtt; 2180 int ret; 2181 2182 if (INTEL_INFO(dev)->gen <= 5) { 2183 gtt->gtt_probe = i915_gmch_probe; 2184 gtt->base.cleanup = i915_gmch_remove; 2185 } else if (INTEL_INFO(dev)->gen < 8) { 2186 gtt->gtt_probe = gen6_gmch_probe; 2187 gtt->base.cleanup = gen6_gmch_remove; 2188 if (IS_HASWELL(dev) && dev_priv->ellc_size) 2189 gtt->base.pte_encode = iris_pte_encode; 2190 else if (IS_HASWELL(dev)) 2191 gtt->base.pte_encode = hsw_pte_encode; 2192 else if (IS_VALLEYVIEW(dev)) 2193 gtt->base.pte_encode = byt_pte_encode; 2194 else if (INTEL_INFO(dev)->gen >= 7) 2195 gtt->base.pte_encode = ivb_pte_encode; 2196 else 2197 gtt->base.pte_encode = snb_pte_encode; 2198 } else { 2199 dev_priv->gtt.gtt_probe = gen8_gmch_probe; 2200 dev_priv->gtt.base.cleanup = gen6_gmch_remove; 2201 } 2202 2203 ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size, 2204 >t->mappable_base, >t->mappable_end); 2205 if (ret) 2206 return ret; 2207 2208 gtt->base.dev = dev; 2209 2210 /* GMADR is the PCI mmio aperture into the global GTT. */ 2211 DRM_INFO("Memory usable by graphics device = %zdM\n", 2212 gtt->base.total >> 20); 2213 DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20); 2214 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20); 2215 #ifdef CONFIG_INTEL_IOMMU 2216 if (intel_iommu_gfx_mapped) 2217 DRM_INFO("VT-d active for gfx access\n"); 2218 #endif 2219 /* 2220 * i915.enable_ppgtt is read-only, so do an early pass to validate the 2221 * user's requested state against the hardware/driver capabilities. We 2222 * do this now so that we can print out any log messages once rather 2223 * than every time we check intel_enable_ppgtt(). 2224 */ 2225 i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt); 2226 DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); 2227 2228 return 0; 2229 } 2230 2231 static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj, 2232 struct i915_address_space *vm, 2233 const struct i915_ggtt_view *view) 2234 { 2235 struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); 2236 if (vma == NULL) 2237 return ERR_PTR(-ENOMEM); 2238 2239 INIT_LIST_HEAD(&vma->vma_link); 2240 INIT_LIST_HEAD(&vma->mm_list); 2241 INIT_LIST_HEAD(&vma->exec_list); 2242 vma->vm = vm; 2243 vma->obj = obj; 2244 vma->ggtt_view = *view; 2245 2246 if (INTEL_INFO(vm->dev)->gen >= 6) { 2247 if (i915_is_ggtt(vm)) { 2248 vma->unbind_vma = ggtt_unbind_vma; 2249 vma->bind_vma = ggtt_bind_vma; 2250 } else { 2251 vma->unbind_vma = ppgtt_unbind_vma; 2252 vma->bind_vma = ppgtt_bind_vma; 2253 } 2254 } else { 2255 BUG_ON(!i915_is_ggtt(vm)); 2256 vma->unbind_vma = i915_ggtt_unbind_vma; 2257 vma->bind_vma = i915_ggtt_bind_vma; 2258 } 2259 2260 list_add_tail(&vma->vma_link, &obj->vma_list); 2261 if (!i915_is_ggtt(vm)) 2262 i915_ppgtt_get(i915_vm_to_ppgtt(vm)); 2263 2264 return vma; 2265 } 2266 2267 struct i915_vma * 2268 i915_gem_obj_lookup_or_create_vma_view(struct drm_i915_gem_object *obj, 2269 struct i915_address_space *vm, 2270 const struct i915_ggtt_view *view) 2271 { 2272 struct i915_vma *vma; 2273 2274 vma = i915_gem_obj_to_vma_view(obj, vm, view); 2275 if (!vma) 2276 vma = __i915_gem_vma_create(obj, vm, view); 2277 2278 return vma; 2279 } 2280 2281 static inline 2282 int i915_get_vma_pages(struct i915_vma *vma) 2283 { 2284 if (vma->ggtt_view.pages) 2285 return 0; 2286 2287 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) 2288 vma->ggtt_view.pages = vma->obj->pages; 2289 else 2290 WARN_ONCE(1, "GGTT view %u not implemented!\n", 2291 vma->ggtt_view.type); 2292 2293 if (!vma->ggtt_view.pages) { 2294 DRM_ERROR("Failed to get pages for VMA view type %u!\n", 2295 vma->ggtt_view.type); 2296 return -EINVAL; 2297 } 2298 2299 return 0; 2300 } 2301 2302 /** 2303 * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. 2304 * @vma: VMA to map 2305 * @cache_level: mapping cache level 2306 * @flags: flags like global or local mapping 2307 * 2308 * DMA addresses are taken from the scatter-gather table of this object (or of 2309 * this VMA in case of non-default GGTT views) and PTE entries set up. 2310 * Note that DMA addresses are also the only part of the SG table we care about. 2311 */ 2312 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, 2313 u32 flags) 2314 { 2315 int ret = i915_get_vma_pages(vma); 2316 2317 if (ret) 2318 return ret; 2319 2320 vma->bind_vma(vma, cache_level, flags); 2321 2322 return 0; 2323 } 2324