1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/seq_file.h> 27 #include <drm/drmP.h> 28 #include <drm/i915_drm.h> 29 #include "i915_drv.h" 30 #include "i915_trace.h" 31 #include "intel_drv.h" 32 33 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv); 34 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv); 35 36 bool intel_enable_ppgtt(struct drm_device *dev, bool full) 37 { 38 if (i915.enable_ppgtt == 0) 39 return false; 40 41 if (i915.enable_ppgtt == 1 && full) 42 return false; 43 44 return true; 45 } 46 47 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt) 48 { 49 if (enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev)) 50 return 0; 51 52 if (enable_ppgtt == 1) 53 return 1; 54 55 if (enable_ppgtt == 2 && HAS_PPGTT(dev)) 56 return 2; 57 58 #ifdef CONFIG_INTEL_IOMMU 59 /* Disable ppgtt on SNB if VT-d is on. */ 60 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) { 61 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 62 return 0; 63 } 64 #endif 65 66 /* Early VLV doesn't have this */ 67 if (IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) && 68 dev->pdev->revision < 0xb) { 69 DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n"); 70 return 0; 71 } 72 73 return HAS_ALIASING_PPGTT(dev) ? 1 : 0; 74 } 75 76 77 static void ppgtt_bind_vma(struct i915_vma *vma, 78 enum i915_cache_level cache_level, 79 u32 flags); 80 static void ppgtt_unbind_vma(struct i915_vma *vma); 81 static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt); 82 83 static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, 84 enum i915_cache_level level, 85 bool valid) 86 { 87 gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; 88 pte |= addr; 89 90 switch (level) { 91 case I915_CACHE_NONE: 92 pte |= PPAT_UNCACHED_INDEX; 93 break; 94 case I915_CACHE_WT: 95 pte |= PPAT_DISPLAY_ELLC_INDEX; 96 break; 97 default: 98 pte |= PPAT_CACHED_INDEX; 99 break; 100 } 101 102 return pte; 103 } 104 105 static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev, 106 dma_addr_t addr, 107 enum i915_cache_level level) 108 { 109 gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 110 pde |= addr; 111 if (level != I915_CACHE_NONE) 112 pde |= PPAT_CACHED_PDE_INDEX; 113 else 114 pde |= PPAT_UNCACHED_INDEX; 115 return pde; 116 } 117 118 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, 119 enum i915_cache_level level, 120 bool valid, u32 unused) 121 { 122 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 123 pte |= GEN6_PTE_ADDR_ENCODE(addr); 124 125 switch (level) { 126 case I915_CACHE_L3_LLC: 127 case I915_CACHE_LLC: 128 pte |= GEN6_PTE_CACHE_LLC; 129 break; 130 case I915_CACHE_NONE: 131 pte |= GEN6_PTE_UNCACHED; 132 break; 133 default: 134 WARN_ON(1); 135 } 136 137 return pte; 138 } 139 140 static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr, 141 enum i915_cache_level level, 142 bool valid, u32 unused) 143 { 144 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 145 pte |= GEN6_PTE_ADDR_ENCODE(addr); 146 147 switch (level) { 148 case I915_CACHE_L3_LLC: 149 pte |= GEN7_PTE_CACHE_L3_LLC; 150 break; 151 case I915_CACHE_LLC: 152 pte |= GEN6_PTE_CACHE_LLC; 153 break; 154 case I915_CACHE_NONE: 155 pte |= GEN6_PTE_UNCACHED; 156 break; 157 default: 158 WARN_ON(1); 159 } 160 161 return pte; 162 } 163 164 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, 165 enum i915_cache_level level, 166 bool valid, u32 flags) 167 { 168 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 169 pte |= GEN6_PTE_ADDR_ENCODE(addr); 170 171 /* Mark the page as writeable. Other platforms don't have a 172 * setting for read-only/writable, so this matches that behavior. 173 */ 174 if (!(flags & PTE_READ_ONLY)) 175 pte |= BYT_PTE_WRITEABLE; 176 177 if (level != I915_CACHE_NONE) 178 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 179 180 return pte; 181 } 182 183 static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr, 184 enum i915_cache_level level, 185 bool valid, u32 unused) 186 { 187 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 188 pte |= HSW_PTE_ADDR_ENCODE(addr); 189 190 if (level != I915_CACHE_NONE) 191 pte |= HSW_WB_LLC_AGE3; 192 193 return pte; 194 } 195 196 static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, 197 enum i915_cache_level level, 198 bool valid, u32 unused) 199 { 200 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 201 pte |= HSW_PTE_ADDR_ENCODE(addr); 202 203 switch (level) { 204 case I915_CACHE_NONE: 205 break; 206 case I915_CACHE_WT: 207 pte |= HSW_WT_ELLC_LLC_AGE3; 208 break; 209 default: 210 pte |= HSW_WB_ELLC_LLC_AGE3; 211 break; 212 } 213 214 return pte; 215 } 216 217 /* Broadwell Page Directory Pointer Descriptors */ 218 static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, 219 uint64_t val, bool synchronous) 220 { 221 struct drm_i915_private *dev_priv = ring->dev->dev_private; 222 int ret; 223 224 BUG_ON(entry >= 4); 225 226 if (synchronous) { 227 I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32); 228 I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val); 229 return 0; 230 } 231 232 ret = intel_ring_begin(ring, 6); 233 if (ret) 234 return ret; 235 236 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 237 intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry)); 238 intel_ring_emit(ring, (u32)(val >> 32)); 239 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 240 intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry)); 241 intel_ring_emit(ring, (u32)(val)); 242 intel_ring_advance(ring); 243 244 return 0; 245 } 246 247 static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, 248 struct intel_engine_cs *ring, 249 bool synchronous) 250 { 251 int i, ret; 252 253 /* bit of a hack to find the actual last used pd */ 254 int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE; 255 256 for (i = used_pd - 1; i >= 0; i--) { 257 dma_addr_t addr = ppgtt->pd_dma_addr[i]; 258 ret = gen8_write_pdp(ring, i, addr, synchronous); 259 if (ret) 260 return ret; 261 } 262 263 return 0; 264 } 265 266 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 267 uint64_t start, 268 uint64_t length, 269 bool use_scratch) 270 { 271 struct i915_hw_ppgtt *ppgtt = 272 container_of(vm, struct i915_hw_ppgtt, base); 273 gen8_gtt_pte_t *pt_vaddr, scratch_pte; 274 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; 275 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; 276 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; 277 unsigned num_entries = length >> PAGE_SHIFT; 278 unsigned last_pte, i; 279 280 scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr, 281 I915_CACHE_LLC, use_scratch); 282 283 while (num_entries) { 284 struct page *page_table = ppgtt->gen8_pt_pages[pdpe][pde]; 285 286 last_pte = pte + num_entries; 287 if (last_pte > GEN8_PTES_PER_PAGE) 288 last_pte = GEN8_PTES_PER_PAGE; 289 290 pt_vaddr = kmap_atomic(page_table); 291 292 for (i = pte; i < last_pte; i++) { 293 pt_vaddr[i] = scratch_pte; 294 num_entries--; 295 } 296 297 if (!HAS_LLC(ppgtt->base.dev)) 298 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 299 kunmap_atomic(pt_vaddr); 300 301 pte = 0; 302 if (++pde == GEN8_PDES_PER_PAGE) { 303 pdpe++; 304 pde = 0; 305 } 306 } 307 } 308 309 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 310 struct sg_table *pages, 311 uint64_t start, 312 enum i915_cache_level cache_level, u32 unused) 313 { 314 struct i915_hw_ppgtt *ppgtt = 315 container_of(vm, struct i915_hw_ppgtt, base); 316 gen8_gtt_pte_t *pt_vaddr; 317 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; 318 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; 319 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; 320 struct sg_page_iter sg_iter; 321 322 pt_vaddr = NULL; 323 324 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { 325 if (WARN_ON(pdpe >= GEN8_LEGACY_PDPS)) 326 break; 327 328 if (pt_vaddr == NULL) 329 pt_vaddr = kmap_atomic(ppgtt->gen8_pt_pages[pdpe][pde]); 330 331 pt_vaddr[pte] = 332 gen8_pte_encode(sg_page_iter_dma_address(&sg_iter), 333 cache_level, true); 334 if (++pte == GEN8_PTES_PER_PAGE) { 335 if (!HAS_LLC(ppgtt->base.dev)) 336 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 337 kunmap_atomic(pt_vaddr); 338 pt_vaddr = NULL; 339 if (++pde == GEN8_PDES_PER_PAGE) { 340 pdpe++; 341 pde = 0; 342 } 343 pte = 0; 344 } 345 } 346 if (pt_vaddr) { 347 if (!HAS_LLC(ppgtt->base.dev)) 348 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 349 kunmap_atomic(pt_vaddr); 350 } 351 } 352 353 static void gen8_free_page_tables(struct page **pt_pages) 354 { 355 int i; 356 357 if (pt_pages == NULL) 358 return; 359 360 for (i = 0; i < GEN8_PDES_PER_PAGE; i++) 361 if (pt_pages[i]) 362 __free_pages(pt_pages[i], 0); 363 } 364 365 static void gen8_ppgtt_free(const struct i915_hw_ppgtt *ppgtt) 366 { 367 int i; 368 369 for (i = 0; i < ppgtt->num_pd_pages; i++) { 370 gen8_free_page_tables(ppgtt->gen8_pt_pages[i]); 371 kfree(ppgtt->gen8_pt_pages[i]); 372 kfree(ppgtt->gen8_pt_dma_addr[i]); 373 } 374 375 __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT)); 376 } 377 378 static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) 379 { 380 struct pci_dev *hwdev = ppgtt->base.dev->pdev; 381 int i, j; 382 383 for (i = 0; i < ppgtt->num_pd_pages; i++) { 384 /* TODO: In the future we'll support sparse mappings, so this 385 * will have to change. */ 386 if (!ppgtt->pd_dma_addr[i]) 387 continue; 388 389 pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE, 390 PCI_DMA_BIDIRECTIONAL); 391 392 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 393 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; 394 if (addr) 395 pci_unmap_page(hwdev, addr, PAGE_SIZE, 396 PCI_DMA_BIDIRECTIONAL); 397 } 398 } 399 } 400 401 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 402 { 403 struct i915_hw_ppgtt *ppgtt = 404 container_of(vm, struct i915_hw_ppgtt, base); 405 406 list_del(&vm->global_link); 407 drm_mm_takedown(&vm->mm); 408 409 gen8_ppgtt_unmap_pages(ppgtt); 410 gen8_ppgtt_free(ppgtt); 411 } 412 413 static struct page **__gen8_alloc_page_tables(void) 414 { 415 struct page **pt_pages; 416 int i; 417 418 pt_pages = kcalloc(GEN8_PDES_PER_PAGE, sizeof(struct page *), GFP_KERNEL); 419 if (!pt_pages) 420 return ERR_PTR(-ENOMEM); 421 422 for (i = 0; i < GEN8_PDES_PER_PAGE; i++) { 423 pt_pages[i] = alloc_page(GFP_KERNEL); 424 if (!pt_pages[i]) 425 goto bail; 426 } 427 428 return pt_pages; 429 430 bail: 431 gen8_free_page_tables(pt_pages); 432 kfree(pt_pages); 433 return ERR_PTR(-ENOMEM); 434 } 435 436 static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt, 437 const int max_pdp) 438 { 439 struct page **pt_pages[GEN8_LEGACY_PDPS]; 440 int i, ret; 441 442 for (i = 0; i < max_pdp; i++) { 443 pt_pages[i] = __gen8_alloc_page_tables(); 444 if (IS_ERR(pt_pages[i])) { 445 ret = PTR_ERR(pt_pages[i]); 446 goto unwind_out; 447 } 448 } 449 450 /* NB: Avoid touching gen8_pt_pages until last to keep the allocation, 451 * "atomic" - for cleanup purposes. 452 */ 453 for (i = 0; i < max_pdp; i++) 454 ppgtt->gen8_pt_pages[i] = pt_pages[i]; 455 456 return 0; 457 458 unwind_out: 459 while (i--) { 460 gen8_free_page_tables(pt_pages[i]); 461 kfree(pt_pages[i]); 462 } 463 464 return ret; 465 } 466 467 static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt) 468 { 469 int i; 470 471 for (i = 0; i < ppgtt->num_pd_pages; i++) { 472 ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE, 473 sizeof(dma_addr_t), 474 GFP_KERNEL); 475 if (!ppgtt->gen8_pt_dma_addr[i]) 476 return -ENOMEM; 477 } 478 479 return 0; 480 } 481 482 static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt, 483 const int max_pdp) 484 { 485 ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT)); 486 if (!ppgtt->pd_pages) 487 return -ENOMEM; 488 489 ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT); 490 BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS); 491 492 return 0; 493 } 494 495 static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt, 496 const int max_pdp) 497 { 498 int ret; 499 500 ret = gen8_ppgtt_allocate_page_directories(ppgtt, max_pdp); 501 if (ret) 502 return ret; 503 504 ret = gen8_ppgtt_allocate_page_tables(ppgtt, max_pdp); 505 if (ret) { 506 __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT)); 507 return ret; 508 } 509 510 ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE; 511 512 ret = gen8_ppgtt_allocate_dma(ppgtt); 513 if (ret) 514 gen8_ppgtt_free(ppgtt); 515 516 return ret; 517 } 518 519 static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt, 520 const int pd) 521 { 522 dma_addr_t pd_addr; 523 int ret; 524 525 pd_addr = pci_map_page(ppgtt->base.dev->pdev, 526 &ppgtt->pd_pages[pd], 0, 527 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 528 529 ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr); 530 if (ret) 531 return ret; 532 533 ppgtt->pd_dma_addr[pd] = pd_addr; 534 535 return 0; 536 } 537 538 static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt, 539 const int pd, 540 const int pt) 541 { 542 dma_addr_t pt_addr; 543 struct page *p; 544 int ret; 545 546 p = ppgtt->gen8_pt_pages[pd][pt]; 547 pt_addr = pci_map_page(ppgtt->base.dev->pdev, 548 p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 549 ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr); 550 if (ret) 551 return ret; 552 553 ppgtt->gen8_pt_dma_addr[pd][pt] = pt_addr; 554 555 return 0; 556 } 557 558 /** 559 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 560 * with a net effect resembling a 2-level page table in normal x86 terms. Each 561 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 562 * space. 563 * 564 * FIXME: split allocation into smaller pieces. For now we only ever do this 565 * once, but with full PPGTT, the multiple contiguous allocations will be bad. 566 * TODO: Do something with the size parameter 567 */ 568 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) 569 { 570 const int max_pdp = DIV_ROUND_UP(size, 1 << 30); 571 const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; 572 int i, j, ret; 573 574 if (size % (1<<30)) 575 DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size); 576 577 /* 1. Do all our allocations for page directories and page tables. */ 578 ret = gen8_ppgtt_alloc(ppgtt, max_pdp); 579 if (ret) 580 return ret; 581 582 /* 583 * 2. Create DMA mappings for the page directories and page tables. 584 */ 585 for (i = 0; i < max_pdp; i++) { 586 ret = gen8_ppgtt_setup_page_directories(ppgtt, i); 587 if (ret) 588 goto bail; 589 590 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 591 ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j); 592 if (ret) 593 goto bail; 594 } 595 } 596 597 /* 598 * 3. Map all the page directory entires to point to the page tables 599 * we've allocated. 600 * 601 * For now, the PPGTT helper functions all require that the PDEs are 602 * plugged in correctly. So we do that now/here. For aliasing PPGTT, we 603 * will never need to touch the PDEs again. 604 */ 605 for (i = 0; i < max_pdp; i++) { 606 gen8_ppgtt_pde_t *pd_vaddr; 607 pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]); 608 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 609 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; 610 pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr, 611 I915_CACHE_LLC); 612 } 613 if (!HAS_LLC(ppgtt->base.dev)) 614 drm_clflush_virt_range(pd_vaddr, PAGE_SIZE); 615 kunmap_atomic(pd_vaddr); 616 } 617 618 ppgtt->enable = gen8_ppgtt_enable; 619 ppgtt->switch_mm = gen8_mm_switch; 620 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 621 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 622 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 623 ppgtt->base.start = 0; 624 ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE; 625 626 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); 627 628 DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n", 629 ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp); 630 DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n", 631 ppgtt->num_pd_entries, 632 (ppgtt->num_pd_entries - min_pt_pages) + size % (1<<30)); 633 return 0; 634 635 bail: 636 gen8_ppgtt_unmap_pages(ppgtt); 637 gen8_ppgtt_free(ppgtt); 638 return ret; 639 } 640 641 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 642 { 643 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; 644 struct i915_address_space *vm = &ppgtt->base; 645 gen6_gtt_pte_t __iomem *pd_addr; 646 gen6_gtt_pte_t scratch_pte; 647 uint32_t pd_entry; 648 int pte, pde; 649 650 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0); 651 652 pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + 653 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); 654 655 seq_printf(m, " VM %p (pd_offset %x-%x):\n", vm, 656 ppgtt->pd_offset, ppgtt->pd_offset + ppgtt->num_pd_entries); 657 for (pde = 0; pde < ppgtt->num_pd_entries; pde++) { 658 u32 expected; 659 gen6_gtt_pte_t *pt_vaddr; 660 dma_addr_t pt_addr = ppgtt->pt_dma_addr[pde]; 661 pd_entry = readl(pd_addr + pde); 662 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 663 664 if (pd_entry != expected) 665 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 666 pde, 667 pd_entry, 668 expected); 669 seq_printf(m, "\tPDE: %x\n", pd_entry); 670 671 pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]); 672 for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) { 673 unsigned long va = 674 (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) + 675 (pte * PAGE_SIZE); 676 int i; 677 bool found = false; 678 for (i = 0; i < 4; i++) 679 if (pt_vaddr[pte + i] != scratch_pte) 680 found = true; 681 if (!found) 682 continue; 683 684 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 685 for (i = 0; i < 4; i++) { 686 if (pt_vaddr[pte + i] != scratch_pte) 687 seq_printf(m, " %08x", pt_vaddr[pte + i]); 688 else 689 seq_puts(m, " SCRATCH "); 690 } 691 seq_puts(m, "\n"); 692 } 693 kunmap_atomic(pt_vaddr); 694 } 695 } 696 697 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) 698 { 699 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; 700 gen6_gtt_pte_t __iomem *pd_addr; 701 uint32_t pd_entry; 702 int i; 703 704 WARN_ON(ppgtt->pd_offset & 0x3f); 705 pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm + 706 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); 707 for (i = 0; i < ppgtt->num_pd_entries; i++) { 708 dma_addr_t pt_addr; 709 710 pt_addr = ppgtt->pt_dma_addr[i]; 711 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); 712 pd_entry |= GEN6_PDE_VALID; 713 714 writel(pd_entry, pd_addr + i); 715 } 716 readl(pd_addr); 717 } 718 719 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) 720 { 721 BUG_ON(ppgtt->pd_offset & 0x3f); 722 723 return (ppgtt->pd_offset / 64) << 16; 724 } 725 726 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 727 struct intel_engine_cs *ring, 728 bool synchronous) 729 { 730 struct drm_device *dev = ppgtt->base.dev; 731 struct drm_i915_private *dev_priv = dev->dev_private; 732 int ret; 733 734 /* If we're in reset, we can assume the GPU is sufficiently idle to 735 * manually frob these bits. Ideally we could use the ring functions, 736 * except our error handling makes it quite difficult (can't use 737 * intel_ring_begin, ring->flush, or intel_ring_advance) 738 * 739 * FIXME: We should try not to special case reset 740 */ 741 if (synchronous || 742 i915_reset_in_progress(&dev_priv->gpu_error)) { 743 WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt); 744 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 745 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 746 POSTING_READ(RING_PP_DIR_BASE(ring)); 747 return 0; 748 } 749 750 /* NB: TLBs must be flushed and invalidated before a switch */ 751 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 752 if (ret) 753 return ret; 754 755 ret = intel_ring_begin(ring, 6); 756 if (ret) 757 return ret; 758 759 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 760 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring)); 761 intel_ring_emit(ring, PP_DIR_DCLV_2G); 762 intel_ring_emit(ring, RING_PP_DIR_BASE(ring)); 763 intel_ring_emit(ring, get_pd_offset(ppgtt)); 764 intel_ring_emit(ring, MI_NOOP); 765 intel_ring_advance(ring); 766 767 return 0; 768 } 769 770 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 771 struct intel_engine_cs *ring, 772 bool synchronous) 773 { 774 struct drm_device *dev = ppgtt->base.dev; 775 struct drm_i915_private *dev_priv = dev->dev_private; 776 int ret; 777 778 /* If we're in reset, we can assume the GPU is sufficiently idle to 779 * manually frob these bits. Ideally we could use the ring functions, 780 * except our error handling makes it quite difficult (can't use 781 * intel_ring_begin, ring->flush, or intel_ring_advance) 782 * 783 * FIXME: We should try not to special case reset 784 */ 785 if (synchronous || 786 i915_reset_in_progress(&dev_priv->gpu_error)) { 787 WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt); 788 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 789 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 790 POSTING_READ(RING_PP_DIR_BASE(ring)); 791 return 0; 792 } 793 794 /* NB: TLBs must be flushed and invalidated before a switch */ 795 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 796 if (ret) 797 return ret; 798 799 ret = intel_ring_begin(ring, 6); 800 if (ret) 801 return ret; 802 803 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 804 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring)); 805 intel_ring_emit(ring, PP_DIR_DCLV_2G); 806 intel_ring_emit(ring, RING_PP_DIR_BASE(ring)); 807 intel_ring_emit(ring, get_pd_offset(ppgtt)); 808 intel_ring_emit(ring, MI_NOOP); 809 intel_ring_advance(ring); 810 811 /* XXX: RCS is the only one to auto invalidate the TLBs? */ 812 if (ring->id != RCS) { 813 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 814 if (ret) 815 return ret; 816 } 817 818 return 0; 819 } 820 821 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 822 struct intel_engine_cs *ring, 823 bool synchronous) 824 { 825 struct drm_device *dev = ppgtt->base.dev; 826 struct drm_i915_private *dev_priv = dev->dev_private; 827 828 if (!synchronous) 829 return 0; 830 831 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 832 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 833 834 POSTING_READ(RING_PP_DIR_DCLV(ring)); 835 836 return 0; 837 } 838 839 static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) 840 { 841 struct drm_device *dev = ppgtt->base.dev; 842 struct drm_i915_private *dev_priv = dev->dev_private; 843 struct intel_engine_cs *ring; 844 int j, ret; 845 846 for_each_ring(ring, dev_priv, j) { 847 I915_WRITE(RING_MODE_GEN7(ring), 848 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 849 850 /* We promise to do a switch later with FULL PPGTT. If this is 851 * aliasing, this is the one and only switch we'll do */ 852 if (USES_FULL_PPGTT(dev)) 853 continue; 854 855 ret = ppgtt->switch_mm(ppgtt, ring, true); 856 if (ret) 857 goto err_out; 858 } 859 860 return 0; 861 862 err_out: 863 for_each_ring(ring, dev_priv, j) 864 I915_WRITE(RING_MODE_GEN7(ring), 865 _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE)); 866 return ret; 867 } 868 869 static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) 870 { 871 struct drm_device *dev = ppgtt->base.dev; 872 struct drm_i915_private *dev_priv = dev->dev_private; 873 struct intel_engine_cs *ring; 874 uint32_t ecochk, ecobits; 875 int i; 876 877 ecobits = I915_READ(GAC_ECO_BITS); 878 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 879 880 ecochk = I915_READ(GAM_ECOCHK); 881 if (IS_HASWELL(dev)) { 882 ecochk |= ECOCHK_PPGTT_WB_HSW; 883 } else { 884 ecochk |= ECOCHK_PPGTT_LLC_IVB; 885 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 886 } 887 I915_WRITE(GAM_ECOCHK, ecochk); 888 889 for_each_ring(ring, dev_priv, i) { 890 int ret; 891 /* GFX_MODE is per-ring on gen7+ */ 892 I915_WRITE(RING_MODE_GEN7(ring), 893 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 894 895 /* We promise to do a switch later with FULL PPGTT. If this is 896 * aliasing, this is the one and only switch we'll do */ 897 if (USES_FULL_PPGTT(dev)) 898 continue; 899 900 ret = ppgtt->switch_mm(ppgtt, ring, true); 901 if (ret) 902 return ret; 903 } 904 905 return 0; 906 } 907 908 static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) 909 { 910 struct drm_device *dev = ppgtt->base.dev; 911 struct drm_i915_private *dev_priv = dev->dev_private; 912 struct intel_engine_cs *ring; 913 uint32_t ecochk, gab_ctl, ecobits; 914 int i; 915 916 ecobits = I915_READ(GAC_ECO_BITS); 917 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 918 ECOBITS_PPGTT_CACHE64B); 919 920 gab_ctl = I915_READ(GAB_CTL); 921 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 922 923 ecochk = I915_READ(GAM_ECOCHK); 924 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 925 926 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 927 928 for_each_ring(ring, dev_priv, i) { 929 int ret = ppgtt->switch_mm(ppgtt, ring, true); 930 if (ret) 931 return ret; 932 } 933 934 return 0; 935 } 936 937 /* PPGTT support for Sandybdrige/Gen6 and later */ 938 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 939 uint64_t start, 940 uint64_t length, 941 bool use_scratch) 942 { 943 struct i915_hw_ppgtt *ppgtt = 944 container_of(vm, struct i915_hw_ppgtt, base); 945 gen6_gtt_pte_t *pt_vaddr, scratch_pte; 946 unsigned first_entry = start >> PAGE_SHIFT; 947 unsigned num_entries = length >> PAGE_SHIFT; 948 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 949 unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; 950 unsigned last_pte, i; 951 952 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0); 953 954 while (num_entries) { 955 last_pte = first_pte + num_entries; 956 if (last_pte > I915_PPGTT_PT_ENTRIES) 957 last_pte = I915_PPGTT_PT_ENTRIES; 958 959 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 960 961 for (i = first_pte; i < last_pte; i++) 962 pt_vaddr[i] = scratch_pte; 963 964 kunmap_atomic(pt_vaddr); 965 966 num_entries -= last_pte - first_pte; 967 first_pte = 0; 968 act_pt++; 969 } 970 } 971 972 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 973 struct sg_table *pages, 974 uint64_t start, 975 enum i915_cache_level cache_level, u32 flags) 976 { 977 struct i915_hw_ppgtt *ppgtt = 978 container_of(vm, struct i915_hw_ppgtt, base); 979 gen6_gtt_pte_t *pt_vaddr; 980 unsigned first_entry = start >> PAGE_SHIFT; 981 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 982 unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES; 983 struct sg_page_iter sg_iter; 984 985 pt_vaddr = NULL; 986 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { 987 if (pt_vaddr == NULL) 988 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 989 990 pt_vaddr[act_pte] = 991 vm->pte_encode(sg_page_iter_dma_address(&sg_iter), 992 cache_level, true, flags); 993 994 if (++act_pte == I915_PPGTT_PT_ENTRIES) { 995 kunmap_atomic(pt_vaddr); 996 pt_vaddr = NULL; 997 act_pt++; 998 act_pte = 0; 999 } 1000 } 1001 if (pt_vaddr) 1002 kunmap_atomic(pt_vaddr); 1003 } 1004 1005 static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) 1006 { 1007 int i; 1008 1009 if (ppgtt->pt_dma_addr) { 1010 for (i = 0; i < ppgtt->num_pd_entries; i++) 1011 pci_unmap_page(ppgtt->base.dev->pdev, 1012 ppgtt->pt_dma_addr[i], 1013 4096, PCI_DMA_BIDIRECTIONAL); 1014 } 1015 } 1016 1017 static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt) 1018 { 1019 int i; 1020 1021 kfree(ppgtt->pt_dma_addr); 1022 for (i = 0; i < ppgtt->num_pd_entries; i++) 1023 __free_page(ppgtt->pt_pages[i]); 1024 kfree(ppgtt->pt_pages); 1025 } 1026 1027 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 1028 { 1029 struct i915_hw_ppgtt *ppgtt = 1030 container_of(vm, struct i915_hw_ppgtt, base); 1031 1032 list_del(&vm->global_link); 1033 drm_mm_takedown(&ppgtt->base.mm); 1034 drm_mm_remove_node(&ppgtt->node); 1035 1036 gen6_ppgtt_unmap_pages(ppgtt); 1037 gen6_ppgtt_free(ppgtt); 1038 } 1039 1040 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 1041 { 1042 struct drm_device *dev = ppgtt->base.dev; 1043 struct drm_i915_private *dev_priv = dev->dev_private; 1044 bool retried = false; 1045 int ret; 1046 1047 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 1048 * allocator works in address space sizes, so it's multiplied by page 1049 * size. We allocate at the top of the GTT to avoid fragmentation. 1050 */ 1051 BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm)); 1052 alloc: 1053 ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm, 1054 &ppgtt->node, GEN6_PD_SIZE, 1055 GEN6_PD_ALIGN, 0, 1056 0, dev_priv->gtt.base.total, 1057 DRM_MM_TOPDOWN); 1058 if (ret == -ENOSPC && !retried) { 1059 ret = i915_gem_evict_something(dev, &dev_priv->gtt.base, 1060 GEN6_PD_SIZE, GEN6_PD_ALIGN, 1061 I915_CACHE_NONE, 1062 0, dev_priv->gtt.base.total, 1063 0); 1064 if (ret) 1065 return ret; 1066 1067 retried = true; 1068 goto alloc; 1069 } 1070 1071 if (ppgtt->node.start < dev_priv->gtt.mappable_end) 1072 DRM_DEBUG("Forced to use aperture for PDEs\n"); 1073 1074 ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES; 1075 return ret; 1076 } 1077 1078 static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt) 1079 { 1080 int i; 1081 1082 ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *), 1083 GFP_KERNEL); 1084 1085 if (!ppgtt->pt_pages) 1086 return -ENOMEM; 1087 1088 for (i = 0; i < ppgtt->num_pd_entries; i++) { 1089 ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL); 1090 if (!ppgtt->pt_pages[i]) { 1091 gen6_ppgtt_free(ppgtt); 1092 return -ENOMEM; 1093 } 1094 } 1095 1096 return 0; 1097 } 1098 1099 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 1100 { 1101 int ret; 1102 1103 ret = gen6_ppgtt_allocate_page_directories(ppgtt); 1104 if (ret) 1105 return ret; 1106 1107 ret = gen6_ppgtt_allocate_page_tables(ppgtt); 1108 if (ret) { 1109 drm_mm_remove_node(&ppgtt->node); 1110 return ret; 1111 } 1112 1113 ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t), 1114 GFP_KERNEL); 1115 if (!ppgtt->pt_dma_addr) { 1116 drm_mm_remove_node(&ppgtt->node); 1117 gen6_ppgtt_free(ppgtt); 1118 return -ENOMEM; 1119 } 1120 1121 return 0; 1122 } 1123 1124 static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt) 1125 { 1126 struct drm_device *dev = ppgtt->base.dev; 1127 int i; 1128 1129 for (i = 0; i < ppgtt->num_pd_entries; i++) { 1130 dma_addr_t pt_addr; 1131 1132 pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096, 1133 PCI_DMA_BIDIRECTIONAL); 1134 1135 if (pci_dma_mapping_error(dev->pdev, pt_addr)) { 1136 gen6_ppgtt_unmap_pages(ppgtt); 1137 return -EIO; 1138 } 1139 1140 ppgtt->pt_dma_addr[i] = pt_addr; 1141 } 1142 1143 return 0; 1144 } 1145 1146 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1147 { 1148 struct drm_device *dev = ppgtt->base.dev; 1149 struct drm_i915_private *dev_priv = dev->dev_private; 1150 int ret; 1151 1152 ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; 1153 if (IS_GEN6(dev)) { 1154 ppgtt->enable = gen6_ppgtt_enable; 1155 ppgtt->switch_mm = gen6_mm_switch; 1156 } else if (IS_HASWELL(dev)) { 1157 ppgtt->enable = gen7_ppgtt_enable; 1158 ppgtt->switch_mm = hsw_mm_switch; 1159 } else if (IS_GEN7(dev)) { 1160 ppgtt->enable = gen7_ppgtt_enable; 1161 ppgtt->switch_mm = gen7_mm_switch; 1162 } else 1163 BUG(); 1164 1165 ret = gen6_ppgtt_alloc(ppgtt); 1166 if (ret) 1167 return ret; 1168 1169 ret = gen6_ppgtt_setup_page_tables(ppgtt); 1170 if (ret) { 1171 gen6_ppgtt_free(ppgtt); 1172 return ret; 1173 } 1174 1175 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 1176 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 1177 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 1178 ppgtt->base.start = 0; 1179 ppgtt->base.total = ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES * PAGE_SIZE; 1180 ppgtt->debug_dump = gen6_dump_ppgtt; 1181 1182 ppgtt->pd_offset = 1183 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t); 1184 1185 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); 1186 1187 DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n", 1188 ppgtt->node.size >> 20, 1189 ppgtt->node.start / PAGE_SIZE); 1190 1191 return 0; 1192 } 1193 1194 int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) 1195 { 1196 struct drm_i915_private *dev_priv = dev->dev_private; 1197 int ret = 0; 1198 1199 ppgtt->base.dev = dev; 1200 ppgtt->base.scratch = dev_priv->gtt.base.scratch; 1201 1202 if (INTEL_INFO(dev)->gen < 8) 1203 ret = gen6_ppgtt_init(ppgtt); 1204 else if (IS_GEN8(dev)) 1205 ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total); 1206 else 1207 BUG(); 1208 1209 if (!ret) { 1210 struct drm_i915_private *dev_priv = dev->dev_private; 1211 kref_init(&ppgtt->ref); 1212 drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, 1213 ppgtt->base.total); 1214 i915_init_vm(dev_priv, &ppgtt->base); 1215 if (INTEL_INFO(dev)->gen < 8) { 1216 gen6_write_pdes(ppgtt); 1217 DRM_DEBUG("Adding PPGTT at offset %x\n", 1218 ppgtt->pd_offset << 10); 1219 } 1220 } 1221 1222 return ret; 1223 } 1224 1225 static void 1226 ppgtt_bind_vma(struct i915_vma *vma, 1227 enum i915_cache_level cache_level, 1228 u32 flags) 1229 { 1230 /* Currently applicable only to VLV */ 1231 if (vma->obj->gt_ro) 1232 flags |= PTE_READ_ONLY; 1233 1234 vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start, 1235 cache_level, flags); 1236 } 1237 1238 static void ppgtt_unbind_vma(struct i915_vma *vma) 1239 { 1240 vma->vm->clear_range(vma->vm, 1241 vma->node.start, 1242 vma->obj->base.size, 1243 true); 1244 } 1245 1246 extern int intel_iommu_gfx_mapped; 1247 /* Certain Gen5 chipsets require require idling the GPU before 1248 * unmapping anything from the GTT when VT-d is enabled. 1249 */ 1250 static inline bool needs_idle_maps(struct drm_device *dev) 1251 { 1252 #ifdef CONFIG_INTEL_IOMMU 1253 /* Query intel_iommu to see if we need the workaround. Presumably that 1254 * was loaded first. 1255 */ 1256 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) 1257 return true; 1258 #endif 1259 return false; 1260 } 1261 1262 static bool do_idling(struct drm_i915_private *dev_priv) 1263 { 1264 bool ret = dev_priv->mm.interruptible; 1265 1266 if (unlikely(dev_priv->gtt.do_idle_maps)) { 1267 dev_priv->mm.interruptible = false; 1268 if (i915_gpu_idle(dev_priv->dev)) { 1269 DRM_ERROR("Couldn't idle GPU\n"); 1270 /* Wait a bit, in hopes it avoids the hang */ 1271 udelay(10); 1272 } 1273 } 1274 1275 return ret; 1276 } 1277 1278 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) 1279 { 1280 if (unlikely(dev_priv->gtt.do_idle_maps)) 1281 dev_priv->mm.interruptible = interruptible; 1282 } 1283 1284 void i915_check_and_clear_faults(struct drm_device *dev) 1285 { 1286 struct drm_i915_private *dev_priv = dev->dev_private; 1287 struct intel_engine_cs *ring; 1288 int i; 1289 1290 if (INTEL_INFO(dev)->gen < 6) 1291 return; 1292 1293 for_each_ring(ring, dev_priv, i) { 1294 u32 fault_reg; 1295 fault_reg = I915_READ(RING_FAULT_REG(ring)); 1296 if (fault_reg & RING_FAULT_VALID) { 1297 DRM_DEBUG_DRIVER("Unexpected fault\n" 1298 "\tAddr: 0x%08lx\\n" 1299 "\tAddress space: %s\n" 1300 "\tSource ID: %d\n" 1301 "\tType: %d\n", 1302 fault_reg & PAGE_MASK, 1303 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 1304 RING_FAULT_SRCID(fault_reg), 1305 RING_FAULT_FAULT_TYPE(fault_reg)); 1306 I915_WRITE(RING_FAULT_REG(ring), 1307 fault_reg & ~RING_FAULT_VALID); 1308 } 1309 } 1310 POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS])); 1311 } 1312 1313 static void i915_ggtt_flush(struct drm_i915_private *dev_priv) 1314 { 1315 if (INTEL_INFO(dev_priv->dev)->gen < 6) { 1316 intel_gtt_chipset_flush(); 1317 } else { 1318 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1319 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1320 } 1321 } 1322 1323 void i915_gem_suspend_gtt_mappings(struct drm_device *dev) 1324 { 1325 struct drm_i915_private *dev_priv = dev->dev_private; 1326 1327 /* Don't bother messing with faults pre GEN6 as we have little 1328 * documentation supporting that it's a good idea. 1329 */ 1330 if (INTEL_INFO(dev)->gen < 6) 1331 return; 1332 1333 i915_check_and_clear_faults(dev); 1334 1335 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 1336 dev_priv->gtt.base.start, 1337 dev_priv->gtt.base.total, 1338 true); 1339 1340 i915_ggtt_flush(dev_priv); 1341 } 1342 1343 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 1344 { 1345 struct drm_i915_private *dev_priv = dev->dev_private; 1346 struct drm_i915_gem_object *obj; 1347 struct i915_address_space *vm; 1348 1349 i915_check_and_clear_faults(dev); 1350 1351 /* First fill our portion of the GTT with scratch pages */ 1352 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 1353 dev_priv->gtt.base.start, 1354 dev_priv->gtt.base.total, 1355 true); 1356 1357 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 1358 struct i915_vma *vma = i915_gem_obj_to_vma(obj, 1359 &dev_priv->gtt.base); 1360 if (!vma) 1361 continue; 1362 1363 i915_gem_clflush_object(obj, obj->pin_display); 1364 /* The bind_vma code tries to be smart about tracking mappings. 1365 * Unfortunately above, we've just wiped out the mappings 1366 * without telling our object about it. So we need to fake it. 1367 */ 1368 obj->has_global_gtt_mapping = 0; 1369 vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND); 1370 } 1371 1372 1373 if (INTEL_INFO(dev)->gen >= 8) { 1374 if (IS_CHERRYVIEW(dev)) 1375 chv_setup_private_ppat(dev_priv); 1376 else 1377 bdw_setup_private_ppat(dev_priv); 1378 1379 return; 1380 } 1381 1382 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 1383 /* TODO: Perhaps it shouldn't be gen6 specific */ 1384 if (i915_is_ggtt(vm)) { 1385 if (dev_priv->mm.aliasing_ppgtt) 1386 gen6_write_pdes(dev_priv->mm.aliasing_ppgtt); 1387 continue; 1388 } 1389 1390 gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base)); 1391 } 1392 1393 i915_ggtt_flush(dev_priv); 1394 } 1395 1396 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) 1397 { 1398 if (obj->has_dma_mapping) 1399 return 0; 1400 1401 if (!dma_map_sg(&obj->base.dev->pdev->dev, 1402 obj->pages->sgl, obj->pages->nents, 1403 PCI_DMA_BIDIRECTIONAL)) 1404 return -ENOSPC; 1405 1406 return 0; 1407 } 1408 1409 static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte) 1410 { 1411 #ifdef writeq 1412 writeq(pte, addr); 1413 #else 1414 iowrite32((u32)pte, addr); 1415 iowrite32(pte >> 32, addr + 4); 1416 #endif 1417 } 1418 1419 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 1420 struct sg_table *st, 1421 uint64_t start, 1422 enum i915_cache_level level, u32 unused) 1423 { 1424 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1425 unsigned first_entry = start >> PAGE_SHIFT; 1426 gen8_gtt_pte_t __iomem *gtt_entries = 1427 (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1428 int i = 0; 1429 struct sg_page_iter sg_iter; 1430 dma_addr_t addr = 0; /* shut up gcc */ 1431 1432 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 1433 addr = sg_dma_address(sg_iter.sg) + 1434 (sg_iter.sg_pgoffset << PAGE_SHIFT); 1435 gen8_set_pte(>t_entries[i], 1436 gen8_pte_encode(addr, level, true)); 1437 i++; 1438 } 1439 1440 /* 1441 * XXX: This serves as a posting read to make sure that the PTE has 1442 * actually been updated. There is some concern that even though 1443 * registers and PTEs are within the same BAR that they are potentially 1444 * of NUMA access patterns. Therefore, even with the way we assume 1445 * hardware should work, we must keep this posting read for paranoia. 1446 */ 1447 if (i != 0) 1448 WARN_ON(readq(>t_entries[i-1]) 1449 != gen8_pte_encode(addr, level, true)); 1450 1451 /* This next bit makes the above posting read even more important. We 1452 * want to flush the TLBs only after we're certain all the PTE updates 1453 * have finished. 1454 */ 1455 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1456 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1457 } 1458 1459 /* 1460 * Binds an object into the global gtt with the specified cache level. The object 1461 * will be accessible to the GPU via commands whose operands reference offsets 1462 * within the global GTT as well as accessible by the GPU through the GMADR 1463 * mapped BAR (dev_priv->mm.gtt->gtt). 1464 */ 1465 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 1466 struct sg_table *st, 1467 uint64_t start, 1468 enum i915_cache_level level, u32 flags) 1469 { 1470 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1471 unsigned first_entry = start >> PAGE_SHIFT; 1472 gen6_gtt_pte_t __iomem *gtt_entries = 1473 (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1474 int i = 0; 1475 struct sg_page_iter sg_iter; 1476 dma_addr_t addr = 0; 1477 1478 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 1479 addr = sg_page_iter_dma_address(&sg_iter); 1480 iowrite32(vm->pte_encode(addr, level, true, flags), >t_entries[i]); 1481 i++; 1482 } 1483 1484 /* XXX: This serves as a posting read to make sure that the PTE has 1485 * actually been updated. There is some concern that even though 1486 * registers and PTEs are within the same BAR that they are potentially 1487 * of NUMA access patterns. Therefore, even with the way we assume 1488 * hardware should work, we must keep this posting read for paranoia. 1489 */ 1490 if (i != 0) { 1491 unsigned long gtt = readl(>t_entries[i-1]); 1492 WARN_ON(gtt != vm->pte_encode(addr, level, true, flags)); 1493 } 1494 1495 /* This next bit makes the above posting read even more important. We 1496 * want to flush the TLBs only after we're certain all the PTE updates 1497 * have finished. 1498 */ 1499 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1500 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1501 } 1502 1503 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 1504 uint64_t start, 1505 uint64_t length, 1506 bool use_scratch) 1507 { 1508 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1509 unsigned first_entry = start >> PAGE_SHIFT; 1510 unsigned num_entries = length >> PAGE_SHIFT; 1511 gen8_gtt_pte_t scratch_pte, __iomem *gtt_base = 1512 (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 1513 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 1514 int i; 1515 1516 if (WARN(num_entries > max_entries, 1517 "First entry = %d; Num entries = %d (max=%d)\n", 1518 first_entry, num_entries, max_entries)) 1519 num_entries = max_entries; 1520 1521 scratch_pte = gen8_pte_encode(vm->scratch.addr, 1522 I915_CACHE_LLC, 1523 use_scratch); 1524 for (i = 0; i < num_entries; i++) 1525 gen8_set_pte(>t_base[i], scratch_pte); 1526 readl(gtt_base); 1527 } 1528 1529 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 1530 uint64_t start, 1531 uint64_t length, 1532 bool use_scratch) 1533 { 1534 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1535 unsigned first_entry = start >> PAGE_SHIFT; 1536 unsigned num_entries = length >> PAGE_SHIFT; 1537 gen6_gtt_pte_t scratch_pte, __iomem *gtt_base = 1538 (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 1539 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 1540 int i; 1541 1542 if (WARN(num_entries > max_entries, 1543 "First entry = %d; Num entries = %d (max=%d)\n", 1544 first_entry, num_entries, max_entries)) 1545 num_entries = max_entries; 1546 1547 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch, 0); 1548 1549 for (i = 0; i < num_entries; i++) 1550 iowrite32(scratch_pte, >t_base[i]); 1551 readl(gtt_base); 1552 } 1553 1554 1555 static void i915_ggtt_bind_vma(struct i915_vma *vma, 1556 enum i915_cache_level cache_level, 1557 u32 unused) 1558 { 1559 const unsigned long entry = vma->node.start >> PAGE_SHIFT; 1560 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 1561 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 1562 1563 BUG_ON(!i915_is_ggtt(vma->vm)); 1564 intel_gtt_insert_sg_entries(vma->obj->pages, entry, flags); 1565 vma->obj->has_global_gtt_mapping = 1; 1566 } 1567 1568 static void i915_ggtt_clear_range(struct i915_address_space *vm, 1569 uint64_t start, 1570 uint64_t length, 1571 bool unused) 1572 { 1573 unsigned first_entry = start >> PAGE_SHIFT; 1574 unsigned num_entries = length >> PAGE_SHIFT; 1575 intel_gtt_clear_range(first_entry, num_entries); 1576 } 1577 1578 static void i915_ggtt_unbind_vma(struct i915_vma *vma) 1579 { 1580 const unsigned int first = vma->node.start >> PAGE_SHIFT; 1581 const unsigned int size = vma->obj->base.size >> PAGE_SHIFT; 1582 1583 BUG_ON(!i915_is_ggtt(vma->vm)); 1584 vma->obj->has_global_gtt_mapping = 0; 1585 intel_gtt_clear_range(first, size); 1586 } 1587 1588 static void ggtt_bind_vma(struct i915_vma *vma, 1589 enum i915_cache_level cache_level, 1590 u32 flags) 1591 { 1592 struct drm_device *dev = vma->vm->dev; 1593 struct drm_i915_private *dev_priv = dev->dev_private; 1594 struct drm_i915_gem_object *obj = vma->obj; 1595 1596 /* Currently applicable only to VLV */ 1597 if (obj->gt_ro) 1598 flags |= PTE_READ_ONLY; 1599 1600 /* If there is no aliasing PPGTT, or the caller needs a global mapping, 1601 * or we have a global mapping already but the cacheability flags have 1602 * changed, set the global PTEs. 1603 * 1604 * If there is an aliasing PPGTT it is anecdotally faster, so use that 1605 * instead if none of the above hold true. 1606 * 1607 * NB: A global mapping should only be needed for special regions like 1608 * "gtt mappable", SNB errata, or if specified via special execbuf 1609 * flags. At all other times, the GPU will use the aliasing PPGTT. 1610 */ 1611 if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) { 1612 if (!obj->has_global_gtt_mapping || 1613 (cache_level != obj->cache_level)) { 1614 vma->vm->insert_entries(vma->vm, obj->pages, 1615 vma->node.start, 1616 cache_level, flags); 1617 obj->has_global_gtt_mapping = 1; 1618 } 1619 } 1620 1621 if (dev_priv->mm.aliasing_ppgtt && 1622 (!obj->has_aliasing_ppgtt_mapping || 1623 (cache_level != obj->cache_level))) { 1624 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 1625 appgtt->base.insert_entries(&appgtt->base, 1626 vma->obj->pages, 1627 vma->node.start, 1628 cache_level, flags); 1629 vma->obj->has_aliasing_ppgtt_mapping = 1; 1630 } 1631 } 1632 1633 static void ggtt_unbind_vma(struct i915_vma *vma) 1634 { 1635 struct drm_device *dev = vma->vm->dev; 1636 struct drm_i915_private *dev_priv = dev->dev_private; 1637 struct drm_i915_gem_object *obj = vma->obj; 1638 1639 if (obj->has_global_gtt_mapping) { 1640 vma->vm->clear_range(vma->vm, 1641 vma->node.start, 1642 obj->base.size, 1643 true); 1644 obj->has_global_gtt_mapping = 0; 1645 } 1646 1647 if (obj->has_aliasing_ppgtt_mapping) { 1648 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 1649 appgtt->base.clear_range(&appgtt->base, 1650 vma->node.start, 1651 obj->base.size, 1652 true); 1653 obj->has_aliasing_ppgtt_mapping = 0; 1654 } 1655 } 1656 1657 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 1658 { 1659 struct drm_device *dev = obj->base.dev; 1660 struct drm_i915_private *dev_priv = dev->dev_private; 1661 bool interruptible; 1662 1663 interruptible = do_idling(dev_priv); 1664 1665 if (!obj->has_dma_mapping) 1666 dma_unmap_sg(&dev->pdev->dev, 1667 obj->pages->sgl, obj->pages->nents, 1668 PCI_DMA_BIDIRECTIONAL); 1669 1670 undo_idling(dev_priv, interruptible); 1671 } 1672 1673 static void i915_gtt_color_adjust(struct drm_mm_node *node, 1674 unsigned long color, 1675 unsigned long *start, 1676 unsigned long *end) 1677 { 1678 if (node->color != color) 1679 *start += 4096; 1680 1681 if (!list_empty(&node->node_list)) { 1682 node = list_entry(node->node_list.next, 1683 struct drm_mm_node, 1684 node_list); 1685 if (node->allocated && node->color != color) 1686 *end -= 4096; 1687 } 1688 } 1689 1690 void i915_gem_setup_global_gtt(struct drm_device *dev, 1691 unsigned long start, 1692 unsigned long mappable_end, 1693 unsigned long end) 1694 { 1695 /* Let GEM Manage all of the aperture. 1696 * 1697 * However, leave one page at the end still bound to the scratch page. 1698 * There are a number of places where the hardware apparently prefetches 1699 * past the end of the object, and we've seen multiple hangs with the 1700 * GPU head pointer stuck in a batchbuffer bound at the last page of the 1701 * aperture. One page should be enough to keep any prefetching inside 1702 * of the aperture. 1703 */ 1704 struct drm_i915_private *dev_priv = dev->dev_private; 1705 struct i915_address_space *ggtt_vm = &dev_priv->gtt.base; 1706 struct drm_mm_node *entry; 1707 struct drm_i915_gem_object *obj; 1708 unsigned long hole_start, hole_end; 1709 1710 BUG_ON(mappable_end > end); 1711 1712 /* Subtract the guard page ... */ 1713 drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE); 1714 if (!HAS_LLC(dev)) 1715 dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust; 1716 1717 /* Mark any preallocated objects as occupied */ 1718 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 1719 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); 1720 int ret; 1721 DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n", 1722 i915_gem_obj_ggtt_offset(obj), obj->base.size); 1723 1724 WARN_ON(i915_gem_obj_ggtt_bound(obj)); 1725 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node); 1726 if (ret) 1727 DRM_DEBUG_KMS("Reservation failed\n"); 1728 obj->has_global_gtt_mapping = 1; 1729 } 1730 1731 dev_priv->gtt.base.start = start; 1732 dev_priv->gtt.base.total = end - start; 1733 1734 /* Clear any non-preallocated blocks */ 1735 drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) { 1736 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 1737 hole_start, hole_end); 1738 ggtt_vm->clear_range(ggtt_vm, hole_start, 1739 hole_end - hole_start, true); 1740 } 1741 1742 /* And finally clear the reserved guard page */ 1743 ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true); 1744 } 1745 1746 void i915_gem_init_global_gtt(struct drm_device *dev) 1747 { 1748 struct drm_i915_private *dev_priv = dev->dev_private; 1749 unsigned long gtt_size, mappable_size; 1750 1751 gtt_size = dev_priv->gtt.base.total; 1752 mappable_size = dev_priv->gtt.mappable_end; 1753 1754 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); 1755 } 1756 1757 static int setup_scratch_page(struct drm_device *dev) 1758 { 1759 struct drm_i915_private *dev_priv = dev->dev_private; 1760 struct page *page; 1761 dma_addr_t dma_addr; 1762 1763 page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); 1764 if (page == NULL) 1765 return -ENOMEM; 1766 get_page(page); 1767 set_pages_uc(page, 1); 1768 1769 #ifdef CONFIG_INTEL_IOMMU 1770 dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE, 1771 PCI_DMA_BIDIRECTIONAL); 1772 if (pci_dma_mapping_error(dev->pdev, dma_addr)) 1773 return -EINVAL; 1774 #else 1775 dma_addr = page_to_phys(page); 1776 #endif 1777 dev_priv->gtt.base.scratch.page = page; 1778 dev_priv->gtt.base.scratch.addr = dma_addr; 1779 1780 return 0; 1781 } 1782 1783 static void teardown_scratch_page(struct drm_device *dev) 1784 { 1785 struct drm_i915_private *dev_priv = dev->dev_private; 1786 struct page *page = dev_priv->gtt.base.scratch.page; 1787 1788 set_pages_wb(page, 1); 1789 pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr, 1790 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 1791 put_page(page); 1792 __free_page(page); 1793 } 1794 1795 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 1796 { 1797 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 1798 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 1799 return snb_gmch_ctl << 20; 1800 } 1801 1802 static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 1803 { 1804 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 1805 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 1806 if (bdw_gmch_ctl) 1807 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 1808 1809 #ifdef CONFIG_X86_32 1810 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ 1811 if (bdw_gmch_ctl > 4) 1812 bdw_gmch_ctl = 4; 1813 #endif 1814 1815 return bdw_gmch_ctl << 20; 1816 } 1817 1818 static inline unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 1819 { 1820 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 1821 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 1822 1823 if (gmch_ctrl) 1824 return 1 << (20 + gmch_ctrl); 1825 1826 return 0; 1827 } 1828 1829 static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 1830 { 1831 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 1832 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 1833 return snb_gmch_ctl << 25; /* 32 MB units */ 1834 } 1835 1836 static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 1837 { 1838 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 1839 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 1840 return bdw_gmch_ctl << 25; /* 32 MB units */ 1841 } 1842 1843 static size_t chv_get_stolen_size(u16 gmch_ctrl) 1844 { 1845 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; 1846 gmch_ctrl &= SNB_GMCH_GMS_MASK; 1847 1848 /* 1849 * 0x0 to 0x10: 32MB increments starting at 0MB 1850 * 0x11 to 0x16: 4MB increments starting at 8MB 1851 * 0x17 to 0x1d: 4MB increments start at 36MB 1852 */ 1853 if (gmch_ctrl < 0x11) 1854 return gmch_ctrl << 25; 1855 else if (gmch_ctrl < 0x17) 1856 return (gmch_ctrl - 0x11 + 2) << 22; 1857 else 1858 return (gmch_ctrl - 0x17 + 9) << 22; 1859 } 1860 1861 static int ggtt_probe_common(struct drm_device *dev, 1862 size_t gtt_size) 1863 { 1864 struct drm_i915_private *dev_priv = dev->dev_private; 1865 phys_addr_t gtt_phys_addr; 1866 int ret; 1867 1868 /* For Modern GENs the PTEs and register space are split in the BAR */ 1869 gtt_phys_addr = pci_resource_start(dev->pdev, 0) + 1870 (pci_resource_len(dev->pdev, 0) / 2); 1871 1872 dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size); 1873 if (!dev_priv->gtt.gsm) { 1874 DRM_ERROR("Failed to map the gtt page table\n"); 1875 return -ENOMEM; 1876 } 1877 1878 ret = setup_scratch_page(dev); 1879 if (ret) { 1880 DRM_ERROR("Scratch setup failed\n"); 1881 /* iounmap will also get called at remove, but meh */ 1882 iounmap(dev_priv->gtt.gsm); 1883 } 1884 1885 return ret; 1886 } 1887 1888 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 1889 * bits. When using advanced contexts each context stores its own PAT, but 1890 * writing this data shouldn't be harmful even in those cases. */ 1891 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) 1892 { 1893 uint64_t pat; 1894 1895 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 1896 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 1897 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 1898 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 1899 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 1900 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 1901 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 1902 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 1903 1904 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 1905 * write would work. */ 1906 I915_WRITE(GEN8_PRIVATE_PAT, pat); 1907 I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32); 1908 } 1909 1910 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) 1911 { 1912 uint64_t pat; 1913 1914 /* 1915 * Map WB on BDW to snooped on CHV. 1916 * 1917 * Only the snoop bit has meaning for CHV, the rest is 1918 * ignored. 1919 * 1920 * Note that the harware enforces snooping for all page 1921 * table accesses. The snoop bit is actually ignored for 1922 * PDEs. 1923 */ 1924 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 1925 GEN8_PPAT(1, 0) | 1926 GEN8_PPAT(2, 0) | 1927 GEN8_PPAT(3, 0) | 1928 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 1929 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 1930 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 1931 GEN8_PPAT(7, CHV_PPAT_SNOOP); 1932 1933 I915_WRITE(GEN8_PRIVATE_PAT, pat); 1934 I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32); 1935 } 1936 1937 static int gen8_gmch_probe(struct drm_device *dev, 1938 size_t *gtt_total, 1939 size_t *stolen, 1940 phys_addr_t *mappable_base, 1941 unsigned long *mappable_end) 1942 { 1943 struct drm_i915_private *dev_priv = dev->dev_private; 1944 unsigned int gtt_size; 1945 u16 snb_gmch_ctl; 1946 int ret; 1947 1948 /* TODO: We're not aware of mappable constraints on gen8 yet */ 1949 *mappable_base = pci_resource_start(dev->pdev, 2); 1950 *mappable_end = pci_resource_len(dev->pdev, 2); 1951 1952 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39))) 1953 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39)); 1954 1955 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1956 1957 if (IS_CHERRYVIEW(dev)) { 1958 *stolen = chv_get_stolen_size(snb_gmch_ctl); 1959 gtt_size = chv_get_total_gtt_size(snb_gmch_ctl); 1960 } else { 1961 *stolen = gen8_get_stolen_size(snb_gmch_ctl); 1962 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl); 1963 } 1964 1965 *gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT; 1966 1967 if (IS_CHERRYVIEW(dev)) 1968 chv_setup_private_ppat(dev_priv); 1969 else 1970 bdw_setup_private_ppat(dev_priv); 1971 1972 ret = ggtt_probe_common(dev, gtt_size); 1973 1974 dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range; 1975 dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries; 1976 1977 return ret; 1978 } 1979 1980 static int gen6_gmch_probe(struct drm_device *dev, 1981 size_t *gtt_total, 1982 size_t *stolen, 1983 phys_addr_t *mappable_base, 1984 unsigned long *mappable_end) 1985 { 1986 struct drm_i915_private *dev_priv = dev->dev_private; 1987 unsigned int gtt_size; 1988 u16 snb_gmch_ctl; 1989 int ret; 1990 1991 *mappable_base = pci_resource_start(dev->pdev, 2); 1992 *mappable_end = pci_resource_len(dev->pdev, 2); 1993 1994 /* 64/512MB is the current min/max we actually know of, but this is just 1995 * a coarse sanity check. 1996 */ 1997 if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) { 1998 DRM_ERROR("Unknown GMADR size (%lx)\n", 1999 dev_priv->gtt.mappable_end); 2000 return -ENXIO; 2001 } 2002 2003 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) 2004 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); 2005 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 2006 2007 *stolen = gen6_get_stolen_size(snb_gmch_ctl); 2008 2009 gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl); 2010 *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT; 2011 2012 ret = ggtt_probe_common(dev, gtt_size); 2013 2014 dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range; 2015 dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries; 2016 2017 return ret; 2018 } 2019 2020 static void gen6_gmch_remove(struct i915_address_space *vm) 2021 { 2022 2023 struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base); 2024 2025 if (drm_mm_initialized(&vm->mm)) { 2026 drm_mm_takedown(&vm->mm); 2027 list_del(&vm->global_link); 2028 } 2029 iounmap(gtt->gsm); 2030 teardown_scratch_page(vm->dev); 2031 } 2032 2033 static int i915_gmch_probe(struct drm_device *dev, 2034 size_t *gtt_total, 2035 size_t *stolen, 2036 phys_addr_t *mappable_base, 2037 unsigned long *mappable_end) 2038 { 2039 struct drm_i915_private *dev_priv = dev->dev_private; 2040 int ret; 2041 2042 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL); 2043 if (!ret) { 2044 DRM_ERROR("failed to set up gmch\n"); 2045 return -EIO; 2046 } 2047 2048 intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end); 2049 2050 dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev); 2051 dev_priv->gtt.base.clear_range = i915_ggtt_clear_range; 2052 2053 if (unlikely(dev_priv->gtt.do_idle_maps)) 2054 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 2055 2056 return 0; 2057 } 2058 2059 static void i915_gmch_remove(struct i915_address_space *vm) 2060 { 2061 if (drm_mm_initialized(&vm->mm)) { 2062 drm_mm_takedown(&vm->mm); 2063 list_del(&vm->global_link); 2064 } 2065 intel_gmch_remove(); 2066 } 2067 2068 int i915_gem_gtt_init(struct drm_device *dev) 2069 { 2070 struct drm_i915_private *dev_priv = dev->dev_private; 2071 struct i915_gtt *gtt = &dev_priv->gtt; 2072 int ret; 2073 2074 if (INTEL_INFO(dev)->gen <= 5) { 2075 gtt->gtt_probe = i915_gmch_probe; 2076 gtt->base.cleanup = i915_gmch_remove; 2077 } else if (INTEL_INFO(dev)->gen < 8) { 2078 gtt->gtt_probe = gen6_gmch_probe; 2079 gtt->base.cleanup = gen6_gmch_remove; 2080 if (IS_HASWELL(dev) && dev_priv->ellc_size) 2081 gtt->base.pte_encode = iris_pte_encode; 2082 else if (IS_HASWELL(dev)) 2083 gtt->base.pte_encode = hsw_pte_encode; 2084 else if (IS_VALLEYVIEW(dev)) 2085 gtt->base.pte_encode = byt_pte_encode; 2086 else if (INTEL_INFO(dev)->gen >= 7) 2087 gtt->base.pte_encode = ivb_pte_encode; 2088 else 2089 gtt->base.pte_encode = snb_pte_encode; 2090 } else { 2091 dev_priv->gtt.gtt_probe = gen8_gmch_probe; 2092 dev_priv->gtt.base.cleanup = gen6_gmch_remove; 2093 } 2094 2095 ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size, 2096 >t->mappable_base, >t->mappable_end); 2097 if (ret) 2098 return ret; 2099 2100 gtt->base.dev = dev; 2101 2102 /* GMADR is the PCI mmio aperture into the global GTT. */ 2103 DRM_INFO("Memory usable by graphics device = %zdM\n", 2104 gtt->base.total >> 20); 2105 DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20); 2106 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20); 2107 #ifdef CONFIG_INTEL_IOMMU 2108 if (intel_iommu_gfx_mapped) 2109 DRM_INFO("VT-d active for gfx access\n"); 2110 #endif 2111 /* 2112 * i915.enable_ppgtt is read-only, so do an early pass to validate the 2113 * user's requested state against the hardware/driver capabilities. We 2114 * do this now so that we can print out any log messages once rather 2115 * than every time we check intel_enable_ppgtt(). 2116 */ 2117 i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt); 2118 DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); 2119 2120 return 0; 2121 } 2122 2123 static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj, 2124 struct i915_address_space *vm) 2125 { 2126 struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); 2127 if (vma == NULL) 2128 return ERR_PTR(-ENOMEM); 2129 2130 INIT_LIST_HEAD(&vma->vma_link); 2131 INIT_LIST_HEAD(&vma->mm_list); 2132 INIT_LIST_HEAD(&vma->exec_list); 2133 vma->vm = vm; 2134 vma->obj = obj; 2135 2136 switch (INTEL_INFO(vm->dev)->gen) { 2137 case 8: 2138 case 7: 2139 case 6: 2140 if (i915_is_ggtt(vm)) { 2141 vma->unbind_vma = ggtt_unbind_vma; 2142 vma->bind_vma = ggtt_bind_vma; 2143 } else { 2144 vma->unbind_vma = ppgtt_unbind_vma; 2145 vma->bind_vma = ppgtt_bind_vma; 2146 } 2147 break; 2148 case 5: 2149 case 4: 2150 case 3: 2151 case 2: 2152 BUG_ON(!i915_is_ggtt(vm)); 2153 vma->unbind_vma = i915_ggtt_unbind_vma; 2154 vma->bind_vma = i915_ggtt_bind_vma; 2155 break; 2156 default: 2157 BUG(); 2158 } 2159 2160 /* Keep GGTT vmas first to make debug easier */ 2161 if (i915_is_ggtt(vm)) 2162 list_add(&vma->vma_link, &obj->vma_list); 2163 else 2164 list_add_tail(&vma->vma_link, &obj->vma_list); 2165 2166 return vma; 2167 } 2168 2169 struct i915_vma * 2170 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 2171 struct i915_address_space *vm) 2172 { 2173 struct i915_vma *vma; 2174 2175 vma = i915_gem_obj_to_vma(obj, vm); 2176 if (!vma) 2177 vma = __i915_gem_vma_create(obj, vm); 2178 2179 return vma; 2180 } 2181