1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/seq_file.h> 27 #include <drm/drmP.h> 28 #include <drm/i915_drm.h> 29 #include "i915_drv.h" 30 #include "i915_trace.h" 31 #include "intel_drv.h" 32 33 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv); 34 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv); 35 36 bool intel_enable_ppgtt(struct drm_device *dev, bool full) 37 { 38 if (i915.enable_ppgtt == 0) 39 return false; 40 41 if (i915.enable_ppgtt == 1 && full) 42 return false; 43 44 return true; 45 } 46 47 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt) 48 { 49 if (enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev)) 50 return 0; 51 52 if (enable_ppgtt == 1) 53 return 1; 54 55 if (enable_ppgtt == 2 && HAS_PPGTT(dev)) 56 return 2; 57 58 #ifdef CONFIG_INTEL_IOMMU 59 /* Disable ppgtt on SNB if VT-d is on. */ 60 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) { 61 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 62 return 0; 63 } 64 #endif 65 66 /* Early VLV doesn't have this */ 67 if (IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) && 68 dev->pdev->revision < 0xb) { 69 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n"); 70 return 0; 71 } 72 73 return HAS_ALIASING_PPGTT(dev) ? 1 : 0; 74 } 75 76 77 static void ppgtt_bind_vma(struct i915_vma *vma, 78 enum i915_cache_level cache_level, 79 u32 flags); 80 static void ppgtt_unbind_vma(struct i915_vma *vma); 81 static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt); 82 83 static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, 84 enum i915_cache_level level, 85 bool valid) 86 { 87 gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; 88 pte |= addr; 89 90 switch (level) { 91 case I915_CACHE_NONE: 92 pte |= PPAT_UNCACHED_INDEX; 93 break; 94 case I915_CACHE_WT: 95 pte |= PPAT_DISPLAY_ELLC_INDEX; 96 break; 97 default: 98 pte |= PPAT_CACHED_INDEX; 99 break; 100 } 101 102 return pte; 103 } 104 105 static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev, 106 dma_addr_t addr, 107 enum i915_cache_level level) 108 { 109 gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 110 pde |= addr; 111 if (level != I915_CACHE_NONE) 112 pde |= PPAT_CACHED_PDE_INDEX; 113 else 114 pde |= PPAT_UNCACHED_INDEX; 115 return pde; 116 } 117 118 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, 119 enum i915_cache_level level, 120 bool valid, u32 unused) 121 { 122 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 123 pte |= GEN6_PTE_ADDR_ENCODE(addr); 124 125 switch (level) { 126 case I915_CACHE_L3_LLC: 127 case I915_CACHE_LLC: 128 pte |= GEN6_PTE_CACHE_LLC; 129 break; 130 case I915_CACHE_NONE: 131 pte |= GEN6_PTE_UNCACHED; 132 break; 133 default: 134 WARN_ON(1); 135 } 136 137 return pte; 138 } 139 140 static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr, 141 enum i915_cache_level level, 142 bool valid, u32 unused) 143 { 144 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 145 pte |= GEN6_PTE_ADDR_ENCODE(addr); 146 147 switch (level) { 148 case I915_CACHE_L3_LLC: 149 pte |= GEN7_PTE_CACHE_L3_LLC; 150 break; 151 case I915_CACHE_LLC: 152 pte |= GEN6_PTE_CACHE_LLC; 153 break; 154 case I915_CACHE_NONE: 155 pte |= GEN6_PTE_UNCACHED; 156 break; 157 default: 158 WARN_ON(1); 159 } 160 161 return pte; 162 } 163 164 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, 165 enum i915_cache_level level, 166 bool valid, u32 flags) 167 { 168 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 169 pte |= GEN6_PTE_ADDR_ENCODE(addr); 170 171 /* Mark the page as writeable. Other platforms don't have a 172 * setting for read-only/writable, so this matches that behavior. 173 */ 174 if (!(flags & PTE_READ_ONLY)) 175 pte |= BYT_PTE_WRITEABLE; 176 177 if (level != I915_CACHE_NONE) 178 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 179 180 return pte; 181 } 182 183 static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr, 184 enum i915_cache_level level, 185 bool valid, u32 unused) 186 { 187 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 188 pte |= HSW_PTE_ADDR_ENCODE(addr); 189 190 if (level != I915_CACHE_NONE) 191 pte |= HSW_WB_LLC_AGE3; 192 193 return pte; 194 } 195 196 static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, 197 enum i915_cache_level level, 198 bool valid, u32 unused) 199 { 200 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 201 pte |= HSW_PTE_ADDR_ENCODE(addr); 202 203 switch (level) { 204 case I915_CACHE_NONE: 205 break; 206 case I915_CACHE_WT: 207 pte |= HSW_WT_ELLC_LLC_AGE3; 208 break; 209 default: 210 pte |= HSW_WB_ELLC_LLC_AGE3; 211 break; 212 } 213 214 return pte; 215 } 216 217 /* Broadwell Page Directory Pointer Descriptors */ 218 static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, 219 uint64_t val, bool synchronous) 220 { 221 struct drm_i915_private *dev_priv = ring->dev->dev_private; 222 int ret; 223 224 BUG_ON(entry >= 4); 225 226 if (synchronous) { 227 I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32); 228 I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val); 229 return 0; 230 } 231 232 ret = intel_ring_begin(ring, 6); 233 if (ret) 234 return ret; 235 236 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 237 intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry)); 238 intel_ring_emit(ring, (u32)(val >> 32)); 239 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 240 intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry)); 241 intel_ring_emit(ring, (u32)(val)); 242 intel_ring_advance(ring); 243 244 return 0; 245 } 246 247 static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, 248 struct intel_engine_cs *ring, 249 bool synchronous) 250 { 251 int i, ret; 252 253 /* bit of a hack to find the actual last used pd */ 254 int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE; 255 256 for (i = used_pd - 1; i >= 0; i--) { 257 dma_addr_t addr = ppgtt->pd_dma_addr[i]; 258 ret = gen8_write_pdp(ring, i, addr, synchronous); 259 if (ret) 260 return ret; 261 } 262 263 return 0; 264 } 265 266 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 267 uint64_t start, 268 uint64_t length, 269 bool use_scratch) 270 { 271 struct i915_hw_ppgtt *ppgtt = 272 container_of(vm, struct i915_hw_ppgtt, base); 273 gen8_gtt_pte_t *pt_vaddr, scratch_pte; 274 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; 275 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; 276 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; 277 unsigned num_entries = length >> PAGE_SHIFT; 278 unsigned last_pte, i; 279 280 scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr, 281 I915_CACHE_LLC, use_scratch); 282 283 while (num_entries) { 284 struct page *page_table = ppgtt->gen8_pt_pages[pdpe][pde]; 285 286 last_pte = pte + num_entries; 287 if (last_pte > GEN8_PTES_PER_PAGE) 288 last_pte = GEN8_PTES_PER_PAGE; 289 290 pt_vaddr = kmap_atomic(page_table); 291 292 for (i = pte; i < last_pte; i++) { 293 pt_vaddr[i] = scratch_pte; 294 num_entries--; 295 } 296 297 if (!HAS_LLC(ppgtt->base.dev)) 298 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 299 kunmap_atomic(pt_vaddr); 300 301 pte = 0; 302 if (++pde == GEN8_PDES_PER_PAGE) { 303 pdpe++; 304 pde = 0; 305 } 306 } 307 } 308 309 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 310 struct sg_table *pages, 311 uint64_t start, 312 enum i915_cache_level cache_level, u32 unused) 313 { 314 struct i915_hw_ppgtt *ppgtt = 315 container_of(vm, struct i915_hw_ppgtt, base); 316 gen8_gtt_pte_t *pt_vaddr; 317 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; 318 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; 319 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; 320 struct sg_page_iter sg_iter; 321 322 pt_vaddr = NULL; 323 324 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { 325 if (WARN_ON(pdpe >= GEN8_LEGACY_PDPS)) 326 break; 327 328 if (pt_vaddr == NULL) 329 pt_vaddr = kmap_atomic(ppgtt->gen8_pt_pages[pdpe][pde]); 330 331 pt_vaddr[pte] = 332 gen8_pte_encode(sg_page_iter_dma_address(&sg_iter), 333 cache_level, true); 334 if (++pte == GEN8_PTES_PER_PAGE) { 335 if (!HAS_LLC(ppgtt->base.dev)) 336 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 337 kunmap_atomic(pt_vaddr); 338 pt_vaddr = NULL; 339 if (++pde == GEN8_PDES_PER_PAGE) { 340 pdpe++; 341 pde = 0; 342 } 343 pte = 0; 344 } 345 } 346 if (pt_vaddr) { 347 if (!HAS_LLC(ppgtt->base.dev)) 348 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 349 kunmap_atomic(pt_vaddr); 350 } 351 } 352 353 static void gen8_free_page_tables(struct page **pt_pages) 354 { 355 int i; 356 357 if (pt_pages == NULL) 358 return; 359 360 for (i = 0; i < GEN8_PDES_PER_PAGE; i++) 361 if (pt_pages[i]) 362 __free_pages(pt_pages[i], 0); 363 } 364 365 static void gen8_ppgtt_free(const struct i915_hw_ppgtt *ppgtt) 366 { 367 int i; 368 369 for (i = 0; i < ppgtt->num_pd_pages; i++) { 370 gen8_free_page_tables(ppgtt->gen8_pt_pages[i]); 371 kfree(ppgtt->gen8_pt_pages[i]); 372 kfree(ppgtt->gen8_pt_dma_addr[i]); 373 } 374 375 __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT)); 376 } 377 378 static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) 379 { 380 struct pci_dev *hwdev = ppgtt->base.dev->pdev; 381 int i, j; 382 383 for (i = 0; i < ppgtt->num_pd_pages; i++) { 384 /* TODO: In the future we'll support sparse mappings, so this 385 * will have to change. */ 386 if (!ppgtt->pd_dma_addr[i]) 387 continue; 388 389 pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE, 390 PCI_DMA_BIDIRECTIONAL); 391 392 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 393 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; 394 if (addr) 395 pci_unmap_page(hwdev, addr, PAGE_SIZE, 396 PCI_DMA_BIDIRECTIONAL); 397 } 398 } 399 } 400 401 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 402 { 403 struct i915_hw_ppgtt *ppgtt = 404 container_of(vm, struct i915_hw_ppgtt, base); 405 406 list_del(&vm->global_link); 407 drm_mm_takedown(&vm->mm); 408 409 gen8_ppgtt_unmap_pages(ppgtt); 410 gen8_ppgtt_free(ppgtt); 411 } 412 413 static struct page **__gen8_alloc_page_tables(void) 414 { 415 struct page **pt_pages; 416 int i; 417 418 pt_pages = kcalloc(GEN8_PDES_PER_PAGE, sizeof(struct page *), GFP_KERNEL); 419 if (!pt_pages) 420 return ERR_PTR(-ENOMEM); 421 422 for (i = 0; i < GEN8_PDES_PER_PAGE; i++) { 423 pt_pages[i] = alloc_page(GFP_KERNEL); 424 if (!pt_pages[i]) 425 goto bail; 426 } 427 428 return pt_pages; 429 430 bail: 431 gen8_free_page_tables(pt_pages); 432 kfree(pt_pages); 433 return ERR_PTR(-ENOMEM); 434 } 435 436 static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt, 437 const int max_pdp) 438 { 439 struct page **pt_pages[GEN8_LEGACY_PDPS]; 440 int i, ret; 441 442 for (i = 0; i < max_pdp; i++) { 443 pt_pages[i] = __gen8_alloc_page_tables(); 444 if (IS_ERR(pt_pages[i])) { 445 ret = PTR_ERR(pt_pages[i]); 446 goto unwind_out; 447 } 448 } 449 450 /* NB: Avoid touching gen8_pt_pages until last to keep the allocation, 451 * "atomic" - for cleanup purposes. 452 */ 453 for (i = 0; i < max_pdp; i++) 454 ppgtt->gen8_pt_pages[i] = pt_pages[i]; 455 456 return 0; 457 458 unwind_out: 459 while (i--) { 460 gen8_free_page_tables(pt_pages[i]); 461 kfree(pt_pages[i]); 462 } 463 464 return ret; 465 } 466 467 static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt) 468 { 469 int i; 470 471 for (i = 0; i < ppgtt->num_pd_pages; i++) { 472 ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE, 473 sizeof(dma_addr_t), 474 GFP_KERNEL); 475 if (!ppgtt->gen8_pt_dma_addr[i]) 476 return -ENOMEM; 477 } 478 479 return 0; 480 } 481 482 static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt, 483 const int max_pdp) 484 { 485 ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT)); 486 if (!ppgtt->pd_pages) 487 return -ENOMEM; 488 489 ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT); 490 BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS); 491 492 return 0; 493 } 494 495 static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt, 496 const int max_pdp) 497 { 498 int ret; 499 500 ret = gen8_ppgtt_allocate_page_directories(ppgtt, max_pdp); 501 if (ret) 502 return ret; 503 504 ret = gen8_ppgtt_allocate_page_tables(ppgtt, max_pdp); 505 if (ret) { 506 __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT)); 507 return ret; 508 } 509 510 ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE; 511 512 ret = gen8_ppgtt_allocate_dma(ppgtt); 513 if (ret) 514 gen8_ppgtt_free(ppgtt); 515 516 return ret; 517 } 518 519 static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt, 520 const int pd) 521 { 522 dma_addr_t pd_addr; 523 int ret; 524 525 pd_addr = pci_map_page(ppgtt->base.dev->pdev, 526 &ppgtt->pd_pages[pd], 0, 527 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 528 529 ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr); 530 if (ret) 531 return ret; 532 533 ppgtt->pd_dma_addr[pd] = pd_addr; 534 535 return 0; 536 } 537 538 static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt, 539 const int pd, 540 const int pt) 541 { 542 dma_addr_t pt_addr; 543 struct page *p; 544 int ret; 545 546 p = ppgtt->gen8_pt_pages[pd][pt]; 547 pt_addr = pci_map_page(ppgtt->base.dev->pdev, 548 p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 549 ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr); 550 if (ret) 551 return ret; 552 553 ppgtt->gen8_pt_dma_addr[pd][pt] = pt_addr; 554 555 return 0; 556 } 557 558 /** 559 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 560 * with a net effect resembling a 2-level page table in normal x86 terms. Each 561 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 562 * space. 563 * 564 * FIXME: split allocation into smaller pieces. For now we only ever do this 565 * once, but with full PPGTT, the multiple contiguous allocations will be bad. 566 * TODO: Do something with the size parameter 567 */ 568 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) 569 { 570 const int max_pdp = DIV_ROUND_UP(size, 1 << 30); 571 const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; 572 int i, j, ret; 573 574 if (size % (1<<30)) 575 DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size); 576 577 /* 1. Do all our allocations for page directories and page tables. */ 578 ret = gen8_ppgtt_alloc(ppgtt, max_pdp); 579 if (ret) 580 return ret; 581 582 /* 583 * 2. Create DMA mappings for the page directories and page tables. 584 */ 585 for (i = 0; i < max_pdp; i++) { 586 ret = gen8_ppgtt_setup_page_directories(ppgtt, i); 587 if (ret) 588 goto bail; 589 590 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 591 ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j); 592 if (ret) 593 goto bail; 594 } 595 } 596 597 /* 598 * 3. Map all the page directory entires to point to the page tables 599 * we've allocated. 600 * 601 * For now, the PPGTT helper functions all require that the PDEs are 602 * plugged in correctly. So we do that now/here. For aliasing PPGTT, we 603 * will never need to touch the PDEs again. 604 */ 605 for (i = 0; i < max_pdp; i++) { 606 gen8_ppgtt_pde_t *pd_vaddr; 607 pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]); 608 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 609 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; 610 pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr, 611 I915_CACHE_LLC); 612 } 613 if (!HAS_LLC(ppgtt->base.dev)) 614 drm_clflush_virt_range(pd_vaddr, PAGE_SIZE); 615 kunmap_atomic(pd_vaddr); 616 } 617 618 ppgtt->enable = gen8_ppgtt_enable; 619 ppgtt->switch_mm = gen8_mm_switch; 620 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 621 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 622 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 623 ppgtt->base.start = 0; 624 ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE; 625 626 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); 627 628 DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n", 629 ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp); 630 DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n", 631 ppgtt->num_pd_entries, 632 (ppgtt->num_pd_entries - min_pt_pages) + size % (1<<30)); 633 return 0; 634 635 bail: 636 gen8_ppgtt_unmap_pages(ppgtt); 637 gen8_ppgtt_free(ppgtt); 638 return ret; 639 } 640 641 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 642 { 643 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; 644 struct i915_address_space *vm = &ppgtt->base; 645 gen6_gtt_pte_t __iomem *pd_addr; 646 gen6_gtt_pte_t scratch_pte; 647 uint32_t pd_entry; 648 int pte, pde; 649 650 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0); 651 652 pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + 653 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); 654 655 seq_printf(m, " VM %p (pd_offset %x-%x):\n", vm, 656 ppgtt->pd_offset, ppgtt->pd_offset + ppgtt->num_pd_entries); 657 for (pde = 0; pde < ppgtt->num_pd_entries; pde++) { 658 u32 expected; 659 gen6_gtt_pte_t *pt_vaddr; 660 dma_addr_t pt_addr = ppgtt->pt_dma_addr[pde]; 661 pd_entry = readl(pd_addr + pde); 662 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 663 664 if (pd_entry != expected) 665 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 666 pde, 667 pd_entry, 668 expected); 669 seq_printf(m, "\tPDE: %x\n", pd_entry); 670 671 pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]); 672 for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) { 673 unsigned long va = 674 (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) + 675 (pte * PAGE_SIZE); 676 int i; 677 bool found = false; 678 for (i = 0; i < 4; i++) 679 if (pt_vaddr[pte + i] != scratch_pte) 680 found = true; 681 if (!found) 682 continue; 683 684 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 685 for (i = 0; i < 4; i++) { 686 if (pt_vaddr[pte + i] != scratch_pte) 687 seq_printf(m, " %08x", pt_vaddr[pte + i]); 688 else 689 seq_puts(m, " SCRATCH "); 690 } 691 seq_puts(m, "\n"); 692 } 693 kunmap_atomic(pt_vaddr); 694 } 695 } 696 697 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) 698 { 699 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; 700 gen6_gtt_pte_t __iomem *pd_addr; 701 uint32_t pd_entry; 702 int i; 703 704 WARN_ON(ppgtt->pd_offset & 0x3f); 705 pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm + 706 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); 707 for (i = 0; i < ppgtt->num_pd_entries; i++) { 708 dma_addr_t pt_addr; 709 710 pt_addr = ppgtt->pt_dma_addr[i]; 711 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); 712 pd_entry |= GEN6_PDE_VALID; 713 714 writel(pd_entry, pd_addr + i); 715 } 716 readl(pd_addr); 717 } 718 719 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) 720 { 721 BUG_ON(ppgtt->pd_offset & 0x3f); 722 723 return (ppgtt->pd_offset / 64) << 16; 724 } 725 726 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 727 struct intel_engine_cs *ring, 728 bool synchronous) 729 { 730 struct drm_device *dev = ppgtt->base.dev; 731 struct drm_i915_private *dev_priv = dev->dev_private; 732 int ret; 733 734 /* If we're in reset, we can assume the GPU is sufficiently idle to 735 * manually frob these bits. Ideally we could use the ring functions, 736 * except our error handling makes it quite difficult (can't use 737 * intel_ring_begin, ring->flush, or intel_ring_advance) 738 * 739 * FIXME: We should try not to special case reset 740 */ 741 if (synchronous || 742 i915_reset_in_progress(&dev_priv->gpu_error)) { 743 WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt); 744 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 745 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 746 POSTING_READ(RING_PP_DIR_BASE(ring)); 747 return 0; 748 } 749 750 /* NB: TLBs must be flushed and invalidated before a switch */ 751 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 752 if (ret) 753 return ret; 754 755 ret = intel_ring_begin(ring, 6); 756 if (ret) 757 return ret; 758 759 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 760 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring)); 761 intel_ring_emit(ring, PP_DIR_DCLV_2G); 762 intel_ring_emit(ring, RING_PP_DIR_BASE(ring)); 763 intel_ring_emit(ring, get_pd_offset(ppgtt)); 764 intel_ring_emit(ring, MI_NOOP); 765 intel_ring_advance(ring); 766 767 return 0; 768 } 769 770 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 771 struct intel_engine_cs *ring, 772 bool synchronous) 773 { 774 struct drm_device *dev = ppgtt->base.dev; 775 struct drm_i915_private *dev_priv = dev->dev_private; 776 int ret; 777 778 /* If we're in reset, we can assume the GPU is sufficiently idle to 779 * manually frob these bits. Ideally we could use the ring functions, 780 * except our error handling makes it quite difficult (can't use 781 * intel_ring_begin, ring->flush, or intel_ring_advance) 782 * 783 * FIXME: We should try not to special case reset 784 */ 785 if (synchronous || 786 i915_reset_in_progress(&dev_priv->gpu_error)) { 787 WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt); 788 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 789 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 790 POSTING_READ(RING_PP_DIR_BASE(ring)); 791 return 0; 792 } 793 794 /* NB: TLBs must be flushed and invalidated before a switch */ 795 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 796 if (ret) 797 return ret; 798 799 ret = intel_ring_begin(ring, 6); 800 if (ret) 801 return ret; 802 803 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 804 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring)); 805 intel_ring_emit(ring, PP_DIR_DCLV_2G); 806 intel_ring_emit(ring, RING_PP_DIR_BASE(ring)); 807 intel_ring_emit(ring, get_pd_offset(ppgtt)); 808 intel_ring_emit(ring, MI_NOOP); 809 intel_ring_advance(ring); 810 811 /* XXX: RCS is the only one to auto invalidate the TLBs? */ 812 if (ring->id != RCS) { 813 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 814 if (ret) 815 return ret; 816 } 817 818 return 0; 819 } 820 821 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 822 struct intel_engine_cs *ring, 823 bool synchronous) 824 { 825 struct drm_device *dev = ppgtt->base.dev; 826 struct drm_i915_private *dev_priv = dev->dev_private; 827 828 if (!synchronous) 829 return 0; 830 831 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 832 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 833 834 POSTING_READ(RING_PP_DIR_DCLV(ring)); 835 836 return 0; 837 } 838 839 static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) 840 { 841 struct drm_device *dev = ppgtt->base.dev; 842 struct drm_i915_private *dev_priv = dev->dev_private; 843 struct intel_engine_cs *ring; 844 int j, ret; 845 846 for_each_ring(ring, dev_priv, j) { 847 I915_WRITE(RING_MODE_GEN7(ring), 848 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 849 850 /* We promise to do a switch later with FULL PPGTT. If this is 851 * aliasing, this is the one and only switch we'll do */ 852 if (USES_FULL_PPGTT(dev)) 853 continue; 854 855 ret = ppgtt->switch_mm(ppgtt, ring, true); 856 if (ret) 857 goto err_out; 858 } 859 860 return 0; 861 862 err_out: 863 for_each_ring(ring, dev_priv, j) 864 I915_WRITE(RING_MODE_GEN7(ring), 865 _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE)); 866 return ret; 867 } 868 869 static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) 870 { 871 struct drm_device *dev = ppgtt->base.dev; 872 struct drm_i915_private *dev_priv = dev->dev_private; 873 struct intel_engine_cs *ring; 874 uint32_t ecochk, ecobits; 875 int i; 876 877 ecobits = I915_READ(GAC_ECO_BITS); 878 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 879 880 ecochk = I915_READ(GAM_ECOCHK); 881 if (IS_HASWELL(dev)) { 882 ecochk |= ECOCHK_PPGTT_WB_HSW; 883 } else { 884 ecochk |= ECOCHK_PPGTT_LLC_IVB; 885 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 886 } 887 I915_WRITE(GAM_ECOCHK, ecochk); 888 889 for_each_ring(ring, dev_priv, i) { 890 int ret; 891 /* GFX_MODE is per-ring on gen7+ */ 892 I915_WRITE(RING_MODE_GEN7(ring), 893 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 894 895 /* We promise to do a switch later with FULL PPGTT. If this is 896 * aliasing, this is the one and only switch we'll do */ 897 if (USES_FULL_PPGTT(dev)) 898 continue; 899 900 ret = ppgtt->switch_mm(ppgtt, ring, true); 901 if (ret) 902 return ret; 903 } 904 905 return 0; 906 } 907 908 static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) 909 { 910 struct drm_device *dev = ppgtt->base.dev; 911 struct drm_i915_private *dev_priv = dev->dev_private; 912 struct intel_engine_cs *ring; 913 uint32_t ecochk, gab_ctl, ecobits; 914 int i; 915 916 ecobits = I915_READ(GAC_ECO_BITS); 917 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 918 ECOBITS_PPGTT_CACHE64B); 919 920 gab_ctl = I915_READ(GAB_CTL); 921 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 922 923 ecochk = I915_READ(GAM_ECOCHK); 924 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 925 926 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 927 928 for_each_ring(ring, dev_priv, i) { 929 int ret = ppgtt->switch_mm(ppgtt, ring, true); 930 if (ret) 931 return ret; 932 } 933 934 return 0; 935 } 936 937 /* PPGTT support for Sandybdrige/Gen6 and later */ 938 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 939 uint64_t start, 940 uint64_t length, 941 bool use_scratch) 942 { 943 struct i915_hw_ppgtt *ppgtt = 944 container_of(vm, struct i915_hw_ppgtt, base); 945 gen6_gtt_pte_t *pt_vaddr, scratch_pte; 946 unsigned first_entry = start >> PAGE_SHIFT; 947 unsigned num_entries = length >> PAGE_SHIFT; 948 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 949 unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; 950 unsigned last_pte, i; 951 952 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0); 953 954 while (num_entries) { 955 last_pte = first_pte + num_entries; 956 if (last_pte > I915_PPGTT_PT_ENTRIES) 957 last_pte = I915_PPGTT_PT_ENTRIES; 958 959 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 960 961 for (i = first_pte; i < last_pte; i++) 962 pt_vaddr[i] = scratch_pte; 963 964 kunmap_atomic(pt_vaddr); 965 966 num_entries -= last_pte - first_pte; 967 first_pte = 0; 968 act_pt++; 969 } 970 } 971 972 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 973 struct sg_table *pages, 974 uint64_t start, 975 enum i915_cache_level cache_level, u32 flags) 976 { 977 struct i915_hw_ppgtt *ppgtt = 978 container_of(vm, struct i915_hw_ppgtt, base); 979 gen6_gtt_pte_t *pt_vaddr; 980 unsigned first_entry = start >> PAGE_SHIFT; 981 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 982 unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES; 983 struct sg_page_iter sg_iter; 984 985 pt_vaddr = NULL; 986 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { 987 if (pt_vaddr == NULL) 988 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 989 990 pt_vaddr[act_pte] = 991 vm->pte_encode(sg_page_iter_dma_address(&sg_iter), 992 cache_level, true, flags); 993 994 if (++act_pte == I915_PPGTT_PT_ENTRIES) { 995 kunmap_atomic(pt_vaddr); 996 pt_vaddr = NULL; 997 act_pt++; 998 act_pte = 0; 999 } 1000 } 1001 if (pt_vaddr) 1002 kunmap_atomic(pt_vaddr); 1003 } 1004 1005 static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) 1006 { 1007 int i; 1008 1009 if (ppgtt->pt_dma_addr) { 1010 for (i = 0; i < ppgtt->num_pd_entries; i++) 1011 pci_unmap_page(ppgtt->base.dev->pdev, 1012 ppgtt->pt_dma_addr[i], 1013 4096, PCI_DMA_BIDIRECTIONAL); 1014 } 1015 } 1016 1017 static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt) 1018 { 1019 int i; 1020 1021 kfree(ppgtt->pt_dma_addr); 1022 for (i = 0; i < ppgtt->num_pd_entries; i++) 1023 __free_page(ppgtt->pt_pages[i]); 1024 kfree(ppgtt->pt_pages); 1025 } 1026 1027 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 1028 { 1029 struct i915_hw_ppgtt *ppgtt = 1030 container_of(vm, struct i915_hw_ppgtt, base); 1031 1032 list_del(&vm->global_link); 1033 drm_mm_takedown(&ppgtt->base.mm); 1034 drm_mm_remove_node(&ppgtt->node); 1035 1036 gen6_ppgtt_unmap_pages(ppgtt); 1037 gen6_ppgtt_free(ppgtt); 1038 } 1039 1040 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 1041 { 1042 struct drm_device *dev = ppgtt->base.dev; 1043 struct drm_i915_private *dev_priv = dev->dev_private; 1044 bool retried = false; 1045 int ret; 1046 1047 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 1048 * allocator works in address space sizes, so it's multiplied by page 1049 * size. We allocate at the top of the GTT to avoid fragmentation. 1050 */ 1051 BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm)); 1052 alloc: 1053 ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm, 1054 &ppgtt->node, GEN6_PD_SIZE, 1055 GEN6_PD_ALIGN, 0, 1056 0, dev_priv->gtt.base.total, 1057 DRM_MM_TOPDOWN); 1058 if (ret == -ENOSPC && !retried) { 1059 ret = i915_gem_evict_something(dev, &dev_priv->gtt.base, 1060 GEN6_PD_SIZE, GEN6_PD_ALIGN, 1061 I915_CACHE_NONE, 1062 0, dev_priv->gtt.base.total, 1063 0); 1064 if (ret) 1065 return ret; 1066 1067 retried = true; 1068 goto alloc; 1069 } 1070 1071 if (ppgtt->node.start < dev_priv->gtt.mappable_end) 1072 DRM_DEBUG("Forced to use aperture for PDEs\n"); 1073 1074 ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES; 1075 return ret; 1076 } 1077 1078 static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt) 1079 { 1080 int i; 1081 1082 ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *), 1083 GFP_KERNEL); 1084 1085 if (!ppgtt->pt_pages) 1086 return -ENOMEM; 1087 1088 for (i = 0; i < ppgtt->num_pd_entries; i++) { 1089 ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL); 1090 if (!ppgtt->pt_pages[i]) { 1091 gen6_ppgtt_free(ppgtt); 1092 return -ENOMEM; 1093 } 1094 } 1095 1096 return 0; 1097 } 1098 1099 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 1100 { 1101 int ret; 1102 1103 ret = gen6_ppgtt_allocate_page_directories(ppgtt); 1104 if (ret) 1105 return ret; 1106 1107 ret = gen6_ppgtt_allocate_page_tables(ppgtt); 1108 if (ret) { 1109 drm_mm_remove_node(&ppgtt->node); 1110 return ret; 1111 } 1112 1113 ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t), 1114 GFP_KERNEL); 1115 if (!ppgtt->pt_dma_addr) { 1116 drm_mm_remove_node(&ppgtt->node); 1117 gen6_ppgtt_free(ppgtt); 1118 return -ENOMEM; 1119 } 1120 1121 return 0; 1122 } 1123 1124 static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt) 1125 { 1126 struct drm_device *dev = ppgtt->base.dev; 1127 int i; 1128 1129 for (i = 0; i < ppgtt->num_pd_entries; i++) { 1130 dma_addr_t pt_addr; 1131 1132 pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096, 1133 PCI_DMA_BIDIRECTIONAL); 1134 1135 if (pci_dma_mapping_error(dev->pdev, pt_addr)) { 1136 gen6_ppgtt_unmap_pages(ppgtt); 1137 return -EIO; 1138 } 1139 1140 ppgtt->pt_dma_addr[i] = pt_addr; 1141 } 1142 1143 return 0; 1144 } 1145 1146 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1147 { 1148 struct drm_device *dev = ppgtt->base.dev; 1149 struct drm_i915_private *dev_priv = dev->dev_private; 1150 int ret; 1151 1152 ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; 1153 if (IS_GEN6(dev)) { 1154 ppgtt->enable = gen6_ppgtt_enable; 1155 ppgtt->switch_mm = gen6_mm_switch; 1156 } else if (IS_HASWELL(dev)) { 1157 ppgtt->enable = gen7_ppgtt_enable; 1158 ppgtt->switch_mm = hsw_mm_switch; 1159 } else if (IS_GEN7(dev)) { 1160 ppgtt->enable = gen7_ppgtt_enable; 1161 ppgtt->switch_mm = gen7_mm_switch; 1162 } else 1163 BUG(); 1164 1165 ret = gen6_ppgtt_alloc(ppgtt); 1166 if (ret) 1167 return ret; 1168 1169 ret = gen6_ppgtt_setup_page_tables(ppgtt); 1170 if (ret) { 1171 gen6_ppgtt_free(ppgtt); 1172 return ret; 1173 } 1174 1175 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 1176 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 1177 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 1178 ppgtt->base.start = 0; 1179 ppgtt->base.total = ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES * PAGE_SIZE; 1180 ppgtt->debug_dump = gen6_dump_ppgtt; 1181 1182 ppgtt->pd_offset = 1183 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t); 1184 1185 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); 1186 1187 DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n", 1188 ppgtt->node.size >> 20, 1189 ppgtt->node.start / PAGE_SIZE); 1190 1191 return 0; 1192 } 1193 1194 int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) 1195 { 1196 struct drm_i915_private *dev_priv = dev->dev_private; 1197 int ret = 0; 1198 1199 ppgtt->base.dev = dev; 1200 ppgtt->base.scratch = dev_priv->gtt.base.scratch; 1201 1202 if (INTEL_INFO(dev)->gen < 8) 1203 ret = gen6_ppgtt_init(ppgtt); 1204 else if (IS_GEN8(dev)) 1205 ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total); 1206 else 1207 BUG(); 1208 1209 if (!ret) { 1210 struct drm_i915_private *dev_priv = dev->dev_private; 1211 kref_init(&ppgtt->ref); 1212 drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, 1213 ppgtt->base.total); 1214 i915_init_vm(dev_priv, &ppgtt->base); 1215 if (INTEL_INFO(dev)->gen < 8) { 1216 gen6_write_pdes(ppgtt); 1217 DRM_DEBUG("Adding PPGTT at offset %x\n", 1218 ppgtt->pd_offset << 10); 1219 } 1220 } 1221 1222 return ret; 1223 } 1224 1225 static void 1226 ppgtt_bind_vma(struct i915_vma *vma, 1227 enum i915_cache_level cache_level, 1228 u32 flags) 1229 { 1230 /* Currently applicable only to VLV */ 1231 if (vma->obj->gt_ro) 1232 flags |= PTE_READ_ONLY; 1233 1234 vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start, 1235 cache_level, flags); 1236 } 1237 1238 static void ppgtt_unbind_vma(struct i915_vma *vma) 1239 { 1240 vma->vm->clear_range(vma->vm, 1241 vma->node.start, 1242 vma->obj->base.size, 1243 true); 1244 } 1245 1246 extern int intel_iommu_gfx_mapped; 1247 /* Certain Gen5 chipsets require require idling the GPU before 1248 * unmapping anything from the GTT when VT-d is enabled. 1249 */ 1250 static inline bool needs_idle_maps(struct drm_device *dev) 1251 { 1252 #ifdef CONFIG_INTEL_IOMMU 1253 /* Query intel_iommu to see if we need the workaround. Presumably that 1254 * was loaded first. 1255 */ 1256 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) 1257 return true; 1258 #endif 1259 return false; 1260 } 1261 1262 static bool do_idling(struct drm_i915_private *dev_priv) 1263 { 1264 bool ret = dev_priv->mm.interruptible; 1265 1266 if (unlikely(dev_priv->gtt.do_idle_maps)) { 1267 dev_priv->mm.interruptible = false; 1268 if (i915_gpu_idle(dev_priv->dev)) { 1269 DRM_ERROR("Couldn't idle GPU\n"); 1270 /* Wait a bit, in hopes it avoids the hang */ 1271 udelay(10); 1272 } 1273 } 1274 1275 return ret; 1276 } 1277 1278 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) 1279 { 1280 if (unlikely(dev_priv->gtt.do_idle_maps)) 1281 dev_priv->mm.interruptible = interruptible; 1282 } 1283 1284 void i915_check_and_clear_faults(struct drm_device *dev) 1285 { 1286 struct drm_i915_private *dev_priv = dev->dev_private; 1287 struct intel_engine_cs *ring; 1288 int i; 1289 1290 if (INTEL_INFO(dev)->gen < 6) 1291 return; 1292 1293 for_each_ring(ring, dev_priv, i) { 1294 u32 fault_reg; 1295 fault_reg = I915_READ(RING_FAULT_REG(ring)); 1296 if (fault_reg & RING_FAULT_VALID) { 1297 DRM_DEBUG_DRIVER("Unexpected fault\n" 1298 "\tAddr: 0x%08lx\\n" 1299 "\tAddress space: %s\n" 1300 "\tSource ID: %d\n" 1301 "\tType: %d\n", 1302 fault_reg & PAGE_MASK, 1303 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 1304 RING_FAULT_SRCID(fault_reg), 1305 RING_FAULT_FAULT_TYPE(fault_reg)); 1306 I915_WRITE(RING_FAULT_REG(ring), 1307 fault_reg & ~RING_FAULT_VALID); 1308 } 1309 } 1310 POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS])); 1311 } 1312 1313 void i915_gem_suspend_gtt_mappings(struct drm_device *dev) 1314 { 1315 struct drm_i915_private *dev_priv = dev->dev_private; 1316 1317 /* Don't bother messing with faults pre GEN6 as we have little 1318 * documentation supporting that it's a good idea. 1319 */ 1320 if (INTEL_INFO(dev)->gen < 6) 1321 return; 1322 1323 i915_check_and_clear_faults(dev); 1324 1325 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 1326 dev_priv->gtt.base.start, 1327 dev_priv->gtt.base.total, 1328 true); 1329 } 1330 1331 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 1332 { 1333 struct drm_i915_private *dev_priv = dev->dev_private; 1334 struct drm_i915_gem_object *obj; 1335 struct i915_address_space *vm; 1336 1337 i915_check_and_clear_faults(dev); 1338 1339 /* First fill our portion of the GTT with scratch pages */ 1340 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 1341 dev_priv->gtt.base.start, 1342 dev_priv->gtt.base.total, 1343 true); 1344 1345 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 1346 struct i915_vma *vma = i915_gem_obj_to_vma(obj, 1347 &dev_priv->gtt.base); 1348 if (!vma) 1349 continue; 1350 1351 i915_gem_clflush_object(obj, obj->pin_display); 1352 /* The bind_vma code tries to be smart about tracking mappings. 1353 * Unfortunately above, we've just wiped out the mappings 1354 * without telling our object about it. So we need to fake it. 1355 */ 1356 obj->has_global_gtt_mapping = 0; 1357 vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND); 1358 } 1359 1360 1361 if (INTEL_INFO(dev)->gen >= 8) { 1362 if (IS_CHERRYVIEW(dev)) 1363 chv_setup_private_ppat(dev_priv); 1364 else 1365 bdw_setup_private_ppat(dev_priv); 1366 1367 return; 1368 } 1369 1370 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 1371 /* TODO: Perhaps it shouldn't be gen6 specific */ 1372 if (i915_is_ggtt(vm)) { 1373 if (dev_priv->mm.aliasing_ppgtt) 1374 gen6_write_pdes(dev_priv->mm.aliasing_ppgtt); 1375 continue; 1376 } 1377 1378 gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base)); 1379 } 1380 1381 i915_gem_chipset_flush(dev); 1382 } 1383 1384 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) 1385 { 1386 if (obj->has_dma_mapping) 1387 return 0; 1388 1389 if (!dma_map_sg(&obj->base.dev->pdev->dev, 1390 obj->pages->sgl, obj->pages->nents, 1391 PCI_DMA_BIDIRECTIONAL)) 1392 return -ENOSPC; 1393 1394 return 0; 1395 } 1396 1397 static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte) 1398 { 1399 #ifdef writeq 1400 writeq(pte, addr); 1401 #else 1402 iowrite32((u32)pte, addr); 1403 iowrite32(pte >> 32, addr + 4); 1404 #endif 1405 } 1406 1407 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 1408 struct sg_table *st, 1409 uint64_t start, 1410 enum i915_cache_level level, u32 unused) 1411 { 1412 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1413 unsigned first_entry = start >> PAGE_SHIFT; 1414 gen8_gtt_pte_t __iomem *gtt_entries = 1415 (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1416 int i = 0; 1417 struct sg_page_iter sg_iter; 1418 dma_addr_t addr = 0; /* shut up gcc */ 1419 1420 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 1421 addr = sg_dma_address(sg_iter.sg) + 1422 (sg_iter.sg_pgoffset << PAGE_SHIFT); 1423 gen8_set_pte(>t_entries[i], 1424 gen8_pte_encode(addr, level, true)); 1425 i++; 1426 } 1427 1428 /* 1429 * XXX: This serves as a posting read to make sure that the PTE has 1430 * actually been updated. There is some concern that even though 1431 * registers and PTEs are within the same BAR that they are potentially 1432 * of NUMA access patterns. Therefore, even with the way we assume 1433 * hardware should work, we must keep this posting read for paranoia. 1434 */ 1435 if (i != 0) 1436 WARN_ON(readq(>t_entries[i-1]) 1437 != gen8_pte_encode(addr, level, true)); 1438 1439 /* This next bit makes the above posting read even more important. We 1440 * want to flush the TLBs only after we're certain all the PTE updates 1441 * have finished. 1442 */ 1443 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1444 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1445 } 1446 1447 /* 1448 * Binds an object into the global gtt with the specified cache level. The object 1449 * will be accessible to the GPU via commands whose operands reference offsets 1450 * within the global GTT as well as accessible by the GPU through the GMADR 1451 * mapped BAR (dev_priv->mm.gtt->gtt). 1452 */ 1453 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 1454 struct sg_table *st, 1455 uint64_t start, 1456 enum i915_cache_level level, u32 flags) 1457 { 1458 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1459 unsigned first_entry = start >> PAGE_SHIFT; 1460 gen6_gtt_pte_t __iomem *gtt_entries = 1461 (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1462 int i = 0; 1463 struct sg_page_iter sg_iter; 1464 dma_addr_t addr = 0; 1465 1466 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 1467 addr = sg_page_iter_dma_address(&sg_iter); 1468 iowrite32(vm->pte_encode(addr, level, true, flags), >t_entries[i]); 1469 i++; 1470 } 1471 1472 /* XXX: This serves as a posting read to make sure that the PTE has 1473 * actually been updated. There is some concern that even though 1474 * registers and PTEs are within the same BAR that they are potentially 1475 * of NUMA access patterns. Therefore, even with the way we assume 1476 * hardware should work, we must keep this posting read for paranoia. 1477 */ 1478 if (i != 0) { 1479 unsigned long gtt = readl(>t_entries[i-1]); 1480 WARN_ON(gtt != vm->pte_encode(addr, level, true, flags)); 1481 } 1482 1483 /* This next bit makes the above posting read even more important. We 1484 * want to flush the TLBs only after we're certain all the PTE updates 1485 * have finished. 1486 */ 1487 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1488 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1489 } 1490 1491 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 1492 uint64_t start, 1493 uint64_t length, 1494 bool use_scratch) 1495 { 1496 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1497 unsigned first_entry = start >> PAGE_SHIFT; 1498 unsigned num_entries = length >> PAGE_SHIFT; 1499 gen8_gtt_pte_t scratch_pte, __iomem *gtt_base = 1500 (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 1501 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 1502 int i; 1503 1504 if (WARN(num_entries > max_entries, 1505 "First entry = %d; Num entries = %d (max=%d)\n", 1506 first_entry, num_entries, max_entries)) 1507 num_entries = max_entries; 1508 1509 scratch_pte = gen8_pte_encode(vm->scratch.addr, 1510 I915_CACHE_LLC, 1511 use_scratch); 1512 for (i = 0; i < num_entries; i++) 1513 gen8_set_pte(>t_base[i], scratch_pte); 1514 readl(gtt_base); 1515 } 1516 1517 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 1518 uint64_t start, 1519 uint64_t length, 1520 bool use_scratch) 1521 { 1522 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1523 unsigned first_entry = start >> PAGE_SHIFT; 1524 unsigned num_entries = length >> PAGE_SHIFT; 1525 gen6_gtt_pte_t scratch_pte, __iomem *gtt_base = 1526 (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 1527 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 1528 int i; 1529 1530 if (WARN(num_entries > max_entries, 1531 "First entry = %d; Num entries = %d (max=%d)\n", 1532 first_entry, num_entries, max_entries)) 1533 num_entries = max_entries; 1534 1535 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch, 0); 1536 1537 for (i = 0; i < num_entries; i++) 1538 iowrite32(scratch_pte, >t_base[i]); 1539 readl(gtt_base); 1540 } 1541 1542 1543 static void i915_ggtt_bind_vma(struct i915_vma *vma, 1544 enum i915_cache_level cache_level, 1545 u32 unused) 1546 { 1547 const unsigned long entry = vma->node.start >> PAGE_SHIFT; 1548 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 1549 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 1550 1551 BUG_ON(!i915_is_ggtt(vma->vm)); 1552 intel_gtt_insert_sg_entries(vma->obj->pages, entry, flags); 1553 vma->obj->has_global_gtt_mapping = 1; 1554 } 1555 1556 static void i915_ggtt_clear_range(struct i915_address_space *vm, 1557 uint64_t start, 1558 uint64_t length, 1559 bool unused) 1560 { 1561 unsigned first_entry = start >> PAGE_SHIFT; 1562 unsigned num_entries = length >> PAGE_SHIFT; 1563 intel_gtt_clear_range(first_entry, num_entries); 1564 } 1565 1566 static void i915_ggtt_unbind_vma(struct i915_vma *vma) 1567 { 1568 const unsigned int first = vma->node.start >> PAGE_SHIFT; 1569 const unsigned int size = vma->obj->base.size >> PAGE_SHIFT; 1570 1571 BUG_ON(!i915_is_ggtt(vma->vm)); 1572 vma->obj->has_global_gtt_mapping = 0; 1573 intel_gtt_clear_range(first, size); 1574 } 1575 1576 static void ggtt_bind_vma(struct i915_vma *vma, 1577 enum i915_cache_level cache_level, 1578 u32 flags) 1579 { 1580 struct drm_device *dev = vma->vm->dev; 1581 struct drm_i915_private *dev_priv = dev->dev_private; 1582 struct drm_i915_gem_object *obj = vma->obj; 1583 1584 /* Currently applicable only to VLV */ 1585 if (obj->gt_ro) 1586 flags |= PTE_READ_ONLY; 1587 1588 /* If there is no aliasing PPGTT, or the caller needs a global mapping, 1589 * or we have a global mapping already but the cacheability flags have 1590 * changed, set the global PTEs. 1591 * 1592 * If there is an aliasing PPGTT it is anecdotally faster, so use that 1593 * instead if none of the above hold true. 1594 * 1595 * NB: A global mapping should only be needed for special regions like 1596 * "gtt mappable", SNB errata, or if specified via special execbuf 1597 * flags. At all other times, the GPU will use the aliasing PPGTT. 1598 */ 1599 if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) { 1600 if (!obj->has_global_gtt_mapping || 1601 (cache_level != obj->cache_level)) { 1602 vma->vm->insert_entries(vma->vm, obj->pages, 1603 vma->node.start, 1604 cache_level, flags); 1605 obj->has_global_gtt_mapping = 1; 1606 } 1607 } 1608 1609 if (dev_priv->mm.aliasing_ppgtt && 1610 (!obj->has_aliasing_ppgtt_mapping || 1611 (cache_level != obj->cache_level))) { 1612 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 1613 appgtt->base.insert_entries(&appgtt->base, 1614 vma->obj->pages, 1615 vma->node.start, 1616 cache_level, flags); 1617 vma->obj->has_aliasing_ppgtt_mapping = 1; 1618 } 1619 } 1620 1621 static void ggtt_unbind_vma(struct i915_vma *vma) 1622 { 1623 struct drm_device *dev = vma->vm->dev; 1624 struct drm_i915_private *dev_priv = dev->dev_private; 1625 struct drm_i915_gem_object *obj = vma->obj; 1626 1627 if (obj->has_global_gtt_mapping) { 1628 vma->vm->clear_range(vma->vm, 1629 vma->node.start, 1630 obj->base.size, 1631 true); 1632 obj->has_global_gtt_mapping = 0; 1633 } 1634 1635 if (obj->has_aliasing_ppgtt_mapping) { 1636 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 1637 appgtt->base.clear_range(&appgtt->base, 1638 vma->node.start, 1639 obj->base.size, 1640 true); 1641 obj->has_aliasing_ppgtt_mapping = 0; 1642 } 1643 } 1644 1645 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 1646 { 1647 struct drm_device *dev = obj->base.dev; 1648 struct drm_i915_private *dev_priv = dev->dev_private; 1649 bool interruptible; 1650 1651 interruptible = do_idling(dev_priv); 1652 1653 if (!obj->has_dma_mapping) 1654 dma_unmap_sg(&dev->pdev->dev, 1655 obj->pages->sgl, obj->pages->nents, 1656 PCI_DMA_BIDIRECTIONAL); 1657 1658 undo_idling(dev_priv, interruptible); 1659 } 1660 1661 static void i915_gtt_color_adjust(struct drm_mm_node *node, 1662 unsigned long color, 1663 unsigned long *start, 1664 unsigned long *end) 1665 { 1666 if (node->color != color) 1667 *start += 4096; 1668 1669 if (!list_empty(&node->node_list)) { 1670 node = list_entry(node->node_list.next, 1671 struct drm_mm_node, 1672 node_list); 1673 if (node->allocated && node->color != color) 1674 *end -= 4096; 1675 } 1676 } 1677 1678 void i915_gem_setup_global_gtt(struct drm_device *dev, 1679 unsigned long start, 1680 unsigned long mappable_end, 1681 unsigned long end) 1682 { 1683 /* Let GEM Manage all of the aperture. 1684 * 1685 * However, leave one page at the end still bound to the scratch page. 1686 * There are a number of places where the hardware apparently prefetches 1687 * past the end of the object, and we've seen multiple hangs with the 1688 * GPU head pointer stuck in a batchbuffer bound at the last page of the 1689 * aperture. One page should be enough to keep any prefetching inside 1690 * of the aperture. 1691 */ 1692 struct drm_i915_private *dev_priv = dev->dev_private; 1693 struct i915_address_space *ggtt_vm = &dev_priv->gtt.base; 1694 struct drm_mm_node *entry; 1695 struct drm_i915_gem_object *obj; 1696 unsigned long hole_start, hole_end; 1697 1698 BUG_ON(mappable_end > end); 1699 1700 /* Subtract the guard page ... */ 1701 drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE); 1702 if (!HAS_LLC(dev)) 1703 dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust; 1704 1705 /* Mark any preallocated objects as occupied */ 1706 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 1707 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); 1708 int ret; 1709 DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n", 1710 i915_gem_obj_ggtt_offset(obj), obj->base.size); 1711 1712 WARN_ON(i915_gem_obj_ggtt_bound(obj)); 1713 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node); 1714 if (ret) 1715 DRM_DEBUG_KMS("Reservation failed\n"); 1716 obj->has_global_gtt_mapping = 1; 1717 } 1718 1719 dev_priv->gtt.base.start = start; 1720 dev_priv->gtt.base.total = end - start; 1721 1722 /* Clear any non-preallocated blocks */ 1723 drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) { 1724 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 1725 hole_start, hole_end); 1726 ggtt_vm->clear_range(ggtt_vm, hole_start, 1727 hole_end - hole_start, true); 1728 } 1729 1730 /* And finally clear the reserved guard page */ 1731 ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true); 1732 } 1733 1734 void i915_gem_init_global_gtt(struct drm_device *dev) 1735 { 1736 struct drm_i915_private *dev_priv = dev->dev_private; 1737 unsigned long gtt_size, mappable_size; 1738 1739 gtt_size = dev_priv->gtt.base.total; 1740 mappable_size = dev_priv->gtt.mappable_end; 1741 1742 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); 1743 } 1744 1745 static int setup_scratch_page(struct drm_device *dev) 1746 { 1747 struct drm_i915_private *dev_priv = dev->dev_private; 1748 struct page *page; 1749 dma_addr_t dma_addr; 1750 1751 page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); 1752 if (page == NULL) 1753 return -ENOMEM; 1754 get_page(page); 1755 set_pages_uc(page, 1); 1756 1757 #ifdef CONFIG_INTEL_IOMMU 1758 dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE, 1759 PCI_DMA_BIDIRECTIONAL); 1760 if (pci_dma_mapping_error(dev->pdev, dma_addr)) 1761 return -EINVAL; 1762 #else 1763 dma_addr = page_to_phys(page); 1764 #endif 1765 dev_priv->gtt.base.scratch.page = page; 1766 dev_priv->gtt.base.scratch.addr = dma_addr; 1767 1768 return 0; 1769 } 1770 1771 static void teardown_scratch_page(struct drm_device *dev) 1772 { 1773 struct drm_i915_private *dev_priv = dev->dev_private; 1774 struct page *page = dev_priv->gtt.base.scratch.page; 1775 1776 set_pages_wb(page, 1); 1777 pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr, 1778 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 1779 put_page(page); 1780 __free_page(page); 1781 } 1782 1783 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 1784 { 1785 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 1786 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 1787 return snb_gmch_ctl << 20; 1788 } 1789 1790 static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 1791 { 1792 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 1793 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 1794 if (bdw_gmch_ctl) 1795 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 1796 1797 #ifdef CONFIG_X86_32 1798 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ 1799 if (bdw_gmch_ctl > 4) 1800 bdw_gmch_ctl = 4; 1801 #endif 1802 1803 return bdw_gmch_ctl << 20; 1804 } 1805 1806 static inline unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 1807 { 1808 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 1809 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 1810 1811 if (gmch_ctrl) 1812 return 1 << (20 + gmch_ctrl); 1813 1814 return 0; 1815 } 1816 1817 static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 1818 { 1819 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 1820 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 1821 return snb_gmch_ctl << 25; /* 32 MB units */ 1822 } 1823 1824 static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 1825 { 1826 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 1827 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 1828 return bdw_gmch_ctl << 25; /* 32 MB units */ 1829 } 1830 1831 static size_t chv_get_stolen_size(u16 gmch_ctrl) 1832 { 1833 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; 1834 gmch_ctrl &= SNB_GMCH_GMS_MASK; 1835 1836 /* 1837 * 0x0 to 0x10: 32MB increments starting at 0MB 1838 * 0x11 to 0x16: 4MB increments starting at 8MB 1839 * 0x17 to 0x1d: 4MB increments start at 36MB 1840 */ 1841 if (gmch_ctrl < 0x11) 1842 return gmch_ctrl << 25; 1843 else if (gmch_ctrl < 0x17) 1844 return (gmch_ctrl - 0x11 + 2) << 22; 1845 else 1846 return (gmch_ctrl - 0x17 + 9) << 22; 1847 } 1848 1849 static int ggtt_probe_common(struct drm_device *dev, 1850 size_t gtt_size) 1851 { 1852 struct drm_i915_private *dev_priv = dev->dev_private; 1853 phys_addr_t gtt_phys_addr; 1854 int ret; 1855 1856 /* For Modern GENs the PTEs and register space are split in the BAR */ 1857 gtt_phys_addr = pci_resource_start(dev->pdev, 0) + 1858 (pci_resource_len(dev->pdev, 0) / 2); 1859 1860 dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size); 1861 if (!dev_priv->gtt.gsm) { 1862 DRM_ERROR("Failed to map the gtt page table\n"); 1863 return -ENOMEM; 1864 } 1865 1866 ret = setup_scratch_page(dev); 1867 if (ret) { 1868 DRM_ERROR("Scratch setup failed\n"); 1869 /* iounmap will also get called at remove, but meh */ 1870 iounmap(dev_priv->gtt.gsm); 1871 } 1872 1873 return ret; 1874 } 1875 1876 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 1877 * bits. When using advanced contexts each context stores its own PAT, but 1878 * writing this data shouldn't be harmful even in those cases. */ 1879 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) 1880 { 1881 uint64_t pat; 1882 1883 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 1884 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 1885 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 1886 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 1887 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 1888 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 1889 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 1890 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 1891 1892 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 1893 * write would work. */ 1894 I915_WRITE(GEN8_PRIVATE_PAT, pat); 1895 I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32); 1896 } 1897 1898 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) 1899 { 1900 uint64_t pat; 1901 1902 /* 1903 * Map WB on BDW to snooped on CHV. 1904 * 1905 * Only the snoop bit has meaning for CHV, the rest is 1906 * ignored. 1907 * 1908 * Note that the harware enforces snooping for all page 1909 * table accesses. The snoop bit is actually ignored for 1910 * PDEs. 1911 */ 1912 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 1913 GEN8_PPAT(1, 0) | 1914 GEN8_PPAT(2, 0) | 1915 GEN8_PPAT(3, 0) | 1916 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 1917 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 1918 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 1919 GEN8_PPAT(7, CHV_PPAT_SNOOP); 1920 1921 I915_WRITE(GEN8_PRIVATE_PAT, pat); 1922 I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32); 1923 } 1924 1925 static int gen8_gmch_probe(struct drm_device *dev, 1926 size_t *gtt_total, 1927 size_t *stolen, 1928 phys_addr_t *mappable_base, 1929 unsigned long *mappable_end) 1930 { 1931 struct drm_i915_private *dev_priv = dev->dev_private; 1932 unsigned int gtt_size; 1933 u16 snb_gmch_ctl; 1934 int ret; 1935 1936 /* TODO: We're not aware of mappable constraints on gen8 yet */ 1937 *mappable_base = pci_resource_start(dev->pdev, 2); 1938 *mappable_end = pci_resource_len(dev->pdev, 2); 1939 1940 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39))) 1941 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39)); 1942 1943 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1944 1945 if (IS_CHERRYVIEW(dev)) { 1946 *stolen = chv_get_stolen_size(snb_gmch_ctl); 1947 gtt_size = chv_get_total_gtt_size(snb_gmch_ctl); 1948 } else { 1949 *stolen = gen8_get_stolen_size(snb_gmch_ctl); 1950 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl); 1951 } 1952 1953 *gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT; 1954 1955 if (IS_CHERRYVIEW(dev)) 1956 chv_setup_private_ppat(dev_priv); 1957 else 1958 bdw_setup_private_ppat(dev_priv); 1959 1960 ret = ggtt_probe_common(dev, gtt_size); 1961 1962 dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range; 1963 dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries; 1964 1965 return ret; 1966 } 1967 1968 static int gen6_gmch_probe(struct drm_device *dev, 1969 size_t *gtt_total, 1970 size_t *stolen, 1971 phys_addr_t *mappable_base, 1972 unsigned long *mappable_end) 1973 { 1974 struct drm_i915_private *dev_priv = dev->dev_private; 1975 unsigned int gtt_size; 1976 u16 snb_gmch_ctl; 1977 int ret; 1978 1979 *mappable_base = pci_resource_start(dev->pdev, 2); 1980 *mappable_end = pci_resource_len(dev->pdev, 2); 1981 1982 /* 64/512MB is the current min/max we actually know of, but this is just 1983 * a coarse sanity check. 1984 */ 1985 if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) { 1986 DRM_ERROR("Unknown GMADR size (%lx)\n", 1987 dev_priv->gtt.mappable_end); 1988 return -ENXIO; 1989 } 1990 1991 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) 1992 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); 1993 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1994 1995 *stolen = gen6_get_stolen_size(snb_gmch_ctl); 1996 1997 gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl); 1998 *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT; 1999 2000 ret = ggtt_probe_common(dev, gtt_size); 2001 2002 dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range; 2003 dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries; 2004 2005 return ret; 2006 } 2007 2008 static void gen6_gmch_remove(struct i915_address_space *vm) 2009 { 2010 2011 struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base); 2012 2013 if (drm_mm_initialized(&vm->mm)) { 2014 drm_mm_takedown(&vm->mm); 2015 list_del(&vm->global_link); 2016 } 2017 iounmap(gtt->gsm); 2018 teardown_scratch_page(vm->dev); 2019 } 2020 2021 static int i915_gmch_probe(struct drm_device *dev, 2022 size_t *gtt_total, 2023 size_t *stolen, 2024 phys_addr_t *mappable_base, 2025 unsigned long *mappable_end) 2026 { 2027 struct drm_i915_private *dev_priv = dev->dev_private; 2028 int ret; 2029 2030 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL); 2031 if (!ret) { 2032 DRM_ERROR("failed to set up gmch\n"); 2033 return -EIO; 2034 } 2035 2036 intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end); 2037 2038 dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev); 2039 dev_priv->gtt.base.clear_range = i915_ggtt_clear_range; 2040 2041 if (unlikely(dev_priv->gtt.do_idle_maps)) 2042 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 2043 2044 return 0; 2045 } 2046 2047 static void i915_gmch_remove(struct i915_address_space *vm) 2048 { 2049 if (drm_mm_initialized(&vm->mm)) { 2050 drm_mm_takedown(&vm->mm); 2051 list_del(&vm->global_link); 2052 } 2053 intel_gmch_remove(); 2054 } 2055 2056 int i915_gem_gtt_init(struct drm_device *dev) 2057 { 2058 struct drm_i915_private *dev_priv = dev->dev_private; 2059 struct i915_gtt *gtt = &dev_priv->gtt; 2060 int ret; 2061 2062 if (INTEL_INFO(dev)->gen <= 5) { 2063 gtt->gtt_probe = i915_gmch_probe; 2064 gtt->base.cleanup = i915_gmch_remove; 2065 } else if (INTEL_INFO(dev)->gen < 8) { 2066 gtt->gtt_probe = gen6_gmch_probe; 2067 gtt->base.cleanup = gen6_gmch_remove; 2068 if (IS_HASWELL(dev) && dev_priv->ellc_size) 2069 gtt->base.pte_encode = iris_pte_encode; 2070 else if (IS_HASWELL(dev)) 2071 gtt->base.pte_encode = hsw_pte_encode; 2072 else if (IS_VALLEYVIEW(dev)) 2073 gtt->base.pte_encode = byt_pte_encode; 2074 else if (INTEL_INFO(dev)->gen >= 7) 2075 gtt->base.pte_encode = ivb_pte_encode; 2076 else 2077 gtt->base.pte_encode = snb_pte_encode; 2078 } else { 2079 dev_priv->gtt.gtt_probe = gen8_gmch_probe; 2080 dev_priv->gtt.base.cleanup = gen6_gmch_remove; 2081 } 2082 2083 ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size, 2084 >t->mappable_base, >t->mappable_end); 2085 if (ret) 2086 return ret; 2087 2088 gtt->base.dev = dev; 2089 2090 /* GMADR is the PCI mmio aperture into the global GTT. */ 2091 DRM_INFO("Memory usable by graphics device = %zdM\n", 2092 gtt->base.total >> 20); 2093 DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20); 2094 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20); 2095 #ifdef CONFIG_INTEL_IOMMU 2096 if (intel_iommu_gfx_mapped) 2097 DRM_INFO("VT-d active for gfx access\n"); 2098 #endif 2099 /* 2100 * i915.enable_ppgtt is read-only, so do an early pass to validate the 2101 * user's requested state against the hardware/driver capabilities. We 2102 * do this now so that we can print out any log messages once rather 2103 * than every time we check intel_enable_ppgtt(). 2104 */ 2105 i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt); 2106 DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); 2107 2108 return 0; 2109 } 2110 2111 static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj, 2112 struct i915_address_space *vm) 2113 { 2114 struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); 2115 if (vma == NULL) 2116 return ERR_PTR(-ENOMEM); 2117 2118 INIT_LIST_HEAD(&vma->vma_link); 2119 INIT_LIST_HEAD(&vma->mm_list); 2120 INIT_LIST_HEAD(&vma->exec_list); 2121 vma->vm = vm; 2122 vma->obj = obj; 2123 2124 switch (INTEL_INFO(vm->dev)->gen) { 2125 case 8: 2126 case 7: 2127 case 6: 2128 if (i915_is_ggtt(vm)) { 2129 vma->unbind_vma = ggtt_unbind_vma; 2130 vma->bind_vma = ggtt_bind_vma; 2131 } else { 2132 vma->unbind_vma = ppgtt_unbind_vma; 2133 vma->bind_vma = ppgtt_bind_vma; 2134 } 2135 break; 2136 case 5: 2137 case 4: 2138 case 3: 2139 case 2: 2140 BUG_ON(!i915_is_ggtt(vm)); 2141 vma->unbind_vma = i915_ggtt_unbind_vma; 2142 vma->bind_vma = i915_ggtt_bind_vma; 2143 break; 2144 default: 2145 BUG(); 2146 } 2147 2148 /* Keep GGTT vmas first to make debug easier */ 2149 if (i915_is_ggtt(vm)) 2150 list_add(&vma->vma_link, &obj->vma_list); 2151 else 2152 list_add_tail(&vma->vma_link, &obj->vma_list); 2153 2154 return vma; 2155 } 2156 2157 struct i915_vma * 2158 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 2159 struct i915_address_space *vm) 2160 { 2161 struct i915_vma *vma; 2162 2163 vma = i915_gem_obj_to_vma(obj, vm); 2164 if (!vma) 2165 vma = __i915_gem_vma_create(obj, vm); 2166 2167 return vma; 2168 } 2169