1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/seq_file.h> 27 #include <drm/drmP.h> 28 #include <drm/i915_drm.h> 29 #include "i915_drv.h" 30 #include "i915_trace.h" 31 #include "intel_drv.h" 32 33 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv); 34 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv); 35 36 bool intel_enable_ppgtt(struct drm_device *dev, bool full) 37 { 38 if (i915.enable_ppgtt == 0) 39 return false; 40 41 if (i915.enable_ppgtt == 1 && full) 42 return false; 43 44 return true; 45 } 46 47 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt) 48 { 49 if (enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev)) 50 return 0; 51 52 if (enable_ppgtt == 1) 53 return 1; 54 55 if (enable_ppgtt == 2 && HAS_PPGTT(dev)) 56 return 2; 57 58 #ifdef CONFIG_INTEL_IOMMU 59 /* Disable ppgtt on SNB if VT-d is on. */ 60 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) { 61 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 62 return 0; 63 } 64 #endif 65 66 return HAS_ALIASING_PPGTT(dev) ? 1 : 0; 67 } 68 69 70 static void ppgtt_bind_vma(struct i915_vma *vma, 71 enum i915_cache_level cache_level, 72 u32 flags); 73 static void ppgtt_unbind_vma(struct i915_vma *vma); 74 static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt); 75 76 static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, 77 enum i915_cache_level level, 78 bool valid) 79 { 80 gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; 81 pte |= addr; 82 83 switch (level) { 84 case I915_CACHE_NONE: 85 pte |= PPAT_UNCACHED_INDEX; 86 break; 87 case I915_CACHE_WT: 88 pte |= PPAT_DISPLAY_ELLC_INDEX; 89 break; 90 default: 91 pte |= PPAT_CACHED_INDEX; 92 break; 93 } 94 95 return pte; 96 } 97 98 static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev, 99 dma_addr_t addr, 100 enum i915_cache_level level) 101 { 102 gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 103 pde |= addr; 104 if (level != I915_CACHE_NONE) 105 pde |= PPAT_CACHED_PDE_INDEX; 106 else 107 pde |= PPAT_UNCACHED_INDEX; 108 return pde; 109 } 110 111 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, 112 enum i915_cache_level level, 113 bool valid) 114 { 115 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 116 pte |= GEN6_PTE_ADDR_ENCODE(addr); 117 118 switch (level) { 119 case I915_CACHE_L3_LLC: 120 case I915_CACHE_LLC: 121 pte |= GEN6_PTE_CACHE_LLC; 122 break; 123 case I915_CACHE_NONE: 124 pte |= GEN6_PTE_UNCACHED; 125 break; 126 default: 127 WARN_ON(1); 128 } 129 130 return pte; 131 } 132 133 static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr, 134 enum i915_cache_level level, 135 bool valid) 136 { 137 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 138 pte |= GEN6_PTE_ADDR_ENCODE(addr); 139 140 switch (level) { 141 case I915_CACHE_L3_LLC: 142 pte |= GEN7_PTE_CACHE_L3_LLC; 143 break; 144 case I915_CACHE_LLC: 145 pte |= GEN6_PTE_CACHE_LLC; 146 break; 147 case I915_CACHE_NONE: 148 pte |= GEN6_PTE_UNCACHED; 149 break; 150 default: 151 WARN_ON(1); 152 } 153 154 return pte; 155 } 156 157 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, 158 enum i915_cache_level level, 159 bool valid) 160 { 161 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 162 pte |= GEN6_PTE_ADDR_ENCODE(addr); 163 164 /* Mark the page as writeable. Other platforms don't have a 165 * setting for read-only/writable, so this matches that behavior. 166 */ 167 pte |= BYT_PTE_WRITEABLE; 168 169 if (level != I915_CACHE_NONE) 170 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 171 172 return pte; 173 } 174 175 static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr, 176 enum i915_cache_level level, 177 bool valid) 178 { 179 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 180 pte |= HSW_PTE_ADDR_ENCODE(addr); 181 182 if (level != I915_CACHE_NONE) 183 pte |= HSW_WB_LLC_AGE3; 184 185 return pte; 186 } 187 188 static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, 189 enum i915_cache_level level, 190 bool valid) 191 { 192 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 193 pte |= HSW_PTE_ADDR_ENCODE(addr); 194 195 switch (level) { 196 case I915_CACHE_NONE: 197 break; 198 case I915_CACHE_WT: 199 pte |= HSW_WT_ELLC_LLC_AGE3; 200 break; 201 default: 202 pte |= HSW_WB_ELLC_LLC_AGE3; 203 break; 204 } 205 206 return pte; 207 } 208 209 /* Broadwell Page Directory Pointer Descriptors */ 210 static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, 211 uint64_t val, bool synchronous) 212 { 213 struct drm_i915_private *dev_priv = ring->dev->dev_private; 214 int ret; 215 216 BUG_ON(entry >= 4); 217 218 if (synchronous) { 219 I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32); 220 I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val); 221 return 0; 222 } 223 224 ret = intel_ring_begin(ring, 6); 225 if (ret) 226 return ret; 227 228 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 229 intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry)); 230 intel_ring_emit(ring, (u32)(val >> 32)); 231 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 232 intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry)); 233 intel_ring_emit(ring, (u32)(val)); 234 intel_ring_advance(ring); 235 236 return 0; 237 } 238 239 static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, 240 struct intel_engine_cs *ring, 241 bool synchronous) 242 { 243 int i, ret; 244 245 /* bit of a hack to find the actual last used pd */ 246 int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE; 247 248 for (i = used_pd - 1; i >= 0; i--) { 249 dma_addr_t addr = ppgtt->pd_dma_addr[i]; 250 ret = gen8_write_pdp(ring, i, addr, synchronous); 251 if (ret) 252 return ret; 253 } 254 255 return 0; 256 } 257 258 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 259 uint64_t start, 260 uint64_t length, 261 bool use_scratch) 262 { 263 struct i915_hw_ppgtt *ppgtt = 264 container_of(vm, struct i915_hw_ppgtt, base); 265 gen8_gtt_pte_t *pt_vaddr, scratch_pte; 266 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; 267 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; 268 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; 269 unsigned num_entries = length >> PAGE_SHIFT; 270 unsigned last_pte, i; 271 272 scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr, 273 I915_CACHE_LLC, use_scratch); 274 275 while (num_entries) { 276 struct page *page_table = ppgtt->gen8_pt_pages[pdpe][pde]; 277 278 last_pte = pte + num_entries; 279 if (last_pte > GEN8_PTES_PER_PAGE) 280 last_pte = GEN8_PTES_PER_PAGE; 281 282 pt_vaddr = kmap_atomic(page_table); 283 284 for (i = pte; i < last_pte; i++) { 285 pt_vaddr[i] = scratch_pte; 286 num_entries--; 287 } 288 289 if (!HAS_LLC(ppgtt->base.dev)) 290 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 291 kunmap_atomic(pt_vaddr); 292 293 pte = 0; 294 if (++pde == GEN8_PDES_PER_PAGE) { 295 pdpe++; 296 pde = 0; 297 } 298 } 299 } 300 301 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 302 struct sg_table *pages, 303 uint64_t start, 304 enum i915_cache_level cache_level) 305 { 306 struct i915_hw_ppgtt *ppgtt = 307 container_of(vm, struct i915_hw_ppgtt, base); 308 gen8_gtt_pte_t *pt_vaddr; 309 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; 310 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; 311 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; 312 struct sg_page_iter sg_iter; 313 314 pt_vaddr = NULL; 315 316 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { 317 if (WARN_ON(pdpe >= GEN8_LEGACY_PDPS)) 318 break; 319 320 if (pt_vaddr == NULL) 321 pt_vaddr = kmap_atomic(ppgtt->gen8_pt_pages[pdpe][pde]); 322 323 pt_vaddr[pte] = 324 gen8_pte_encode(sg_page_iter_dma_address(&sg_iter), 325 cache_level, true); 326 if (++pte == GEN8_PTES_PER_PAGE) { 327 if (!HAS_LLC(ppgtt->base.dev)) 328 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 329 kunmap_atomic(pt_vaddr); 330 pt_vaddr = NULL; 331 if (++pde == GEN8_PDES_PER_PAGE) { 332 pdpe++; 333 pde = 0; 334 } 335 pte = 0; 336 } 337 } 338 if (pt_vaddr) { 339 if (!HAS_LLC(ppgtt->base.dev)) 340 drm_clflush_virt_range(pt_vaddr, PAGE_SIZE); 341 kunmap_atomic(pt_vaddr); 342 } 343 } 344 345 static void gen8_free_page_tables(struct page **pt_pages) 346 { 347 int i; 348 349 if (pt_pages == NULL) 350 return; 351 352 for (i = 0; i < GEN8_PDES_PER_PAGE; i++) 353 if (pt_pages[i]) 354 __free_pages(pt_pages[i], 0); 355 } 356 357 static void gen8_ppgtt_free(const struct i915_hw_ppgtt *ppgtt) 358 { 359 int i; 360 361 for (i = 0; i < ppgtt->num_pd_pages; i++) { 362 gen8_free_page_tables(ppgtt->gen8_pt_pages[i]); 363 kfree(ppgtt->gen8_pt_pages[i]); 364 kfree(ppgtt->gen8_pt_dma_addr[i]); 365 } 366 367 __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT)); 368 } 369 370 static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) 371 { 372 struct pci_dev *hwdev = ppgtt->base.dev->pdev; 373 int i, j; 374 375 for (i = 0; i < ppgtt->num_pd_pages; i++) { 376 /* TODO: In the future we'll support sparse mappings, so this 377 * will have to change. */ 378 if (!ppgtt->pd_dma_addr[i]) 379 continue; 380 381 pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE, 382 PCI_DMA_BIDIRECTIONAL); 383 384 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 385 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; 386 if (addr) 387 pci_unmap_page(hwdev, addr, PAGE_SIZE, 388 PCI_DMA_BIDIRECTIONAL); 389 } 390 } 391 } 392 393 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 394 { 395 struct i915_hw_ppgtt *ppgtt = 396 container_of(vm, struct i915_hw_ppgtt, base); 397 398 list_del(&vm->global_link); 399 drm_mm_takedown(&vm->mm); 400 401 gen8_ppgtt_unmap_pages(ppgtt); 402 gen8_ppgtt_free(ppgtt); 403 } 404 405 static struct page **__gen8_alloc_page_tables(void) 406 { 407 struct page **pt_pages; 408 int i; 409 410 pt_pages = kcalloc(GEN8_PDES_PER_PAGE, sizeof(struct page *), GFP_KERNEL); 411 if (!pt_pages) 412 return ERR_PTR(-ENOMEM); 413 414 for (i = 0; i < GEN8_PDES_PER_PAGE; i++) { 415 pt_pages[i] = alloc_page(GFP_KERNEL); 416 if (!pt_pages[i]) 417 goto bail; 418 } 419 420 return pt_pages; 421 422 bail: 423 gen8_free_page_tables(pt_pages); 424 kfree(pt_pages); 425 return ERR_PTR(-ENOMEM); 426 } 427 428 static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt, 429 const int max_pdp) 430 { 431 struct page **pt_pages[GEN8_LEGACY_PDPS]; 432 int i, ret; 433 434 for (i = 0; i < max_pdp; i++) { 435 pt_pages[i] = __gen8_alloc_page_tables(); 436 if (IS_ERR(pt_pages[i])) { 437 ret = PTR_ERR(pt_pages[i]); 438 goto unwind_out; 439 } 440 } 441 442 /* NB: Avoid touching gen8_pt_pages until last to keep the allocation, 443 * "atomic" - for cleanup purposes. 444 */ 445 for (i = 0; i < max_pdp; i++) 446 ppgtt->gen8_pt_pages[i] = pt_pages[i]; 447 448 return 0; 449 450 unwind_out: 451 while (i--) { 452 gen8_free_page_tables(pt_pages[i]); 453 kfree(pt_pages[i]); 454 } 455 456 return ret; 457 } 458 459 static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt) 460 { 461 int i; 462 463 for (i = 0; i < ppgtt->num_pd_pages; i++) { 464 ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE, 465 sizeof(dma_addr_t), 466 GFP_KERNEL); 467 if (!ppgtt->gen8_pt_dma_addr[i]) 468 return -ENOMEM; 469 } 470 471 return 0; 472 } 473 474 static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt, 475 const int max_pdp) 476 { 477 ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT)); 478 if (!ppgtt->pd_pages) 479 return -ENOMEM; 480 481 ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT); 482 BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS); 483 484 return 0; 485 } 486 487 static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt, 488 const int max_pdp) 489 { 490 int ret; 491 492 ret = gen8_ppgtt_allocate_page_directories(ppgtt, max_pdp); 493 if (ret) 494 return ret; 495 496 ret = gen8_ppgtt_allocate_page_tables(ppgtt, max_pdp); 497 if (ret) { 498 __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT)); 499 return ret; 500 } 501 502 ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE; 503 504 ret = gen8_ppgtt_allocate_dma(ppgtt); 505 if (ret) 506 gen8_ppgtt_free(ppgtt); 507 508 return ret; 509 } 510 511 static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt, 512 const int pd) 513 { 514 dma_addr_t pd_addr; 515 int ret; 516 517 pd_addr = pci_map_page(ppgtt->base.dev->pdev, 518 &ppgtt->pd_pages[pd], 0, 519 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 520 521 ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr); 522 if (ret) 523 return ret; 524 525 ppgtt->pd_dma_addr[pd] = pd_addr; 526 527 return 0; 528 } 529 530 static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt, 531 const int pd, 532 const int pt) 533 { 534 dma_addr_t pt_addr; 535 struct page *p; 536 int ret; 537 538 p = ppgtt->gen8_pt_pages[pd][pt]; 539 pt_addr = pci_map_page(ppgtt->base.dev->pdev, 540 p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 541 ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr); 542 if (ret) 543 return ret; 544 545 ppgtt->gen8_pt_dma_addr[pd][pt] = pt_addr; 546 547 return 0; 548 } 549 550 /** 551 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 552 * with a net effect resembling a 2-level page table in normal x86 terms. Each 553 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 554 * space. 555 * 556 * FIXME: split allocation into smaller pieces. For now we only ever do this 557 * once, but with full PPGTT, the multiple contiguous allocations will be bad. 558 * TODO: Do something with the size parameter 559 */ 560 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) 561 { 562 const int max_pdp = DIV_ROUND_UP(size, 1 << 30); 563 const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; 564 int i, j, ret; 565 566 if (size % (1<<30)) 567 DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size); 568 569 /* 1. Do all our allocations for page directories and page tables. */ 570 ret = gen8_ppgtt_alloc(ppgtt, max_pdp); 571 if (ret) 572 return ret; 573 574 /* 575 * 2. Create DMA mappings for the page directories and page tables. 576 */ 577 for (i = 0; i < max_pdp; i++) { 578 ret = gen8_ppgtt_setup_page_directories(ppgtt, i); 579 if (ret) 580 goto bail; 581 582 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 583 ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j); 584 if (ret) 585 goto bail; 586 } 587 } 588 589 /* 590 * 3. Map all the page directory entires to point to the page tables 591 * we've allocated. 592 * 593 * For now, the PPGTT helper functions all require that the PDEs are 594 * plugged in correctly. So we do that now/here. For aliasing PPGTT, we 595 * will never need to touch the PDEs again. 596 */ 597 for (i = 0; i < max_pdp; i++) { 598 gen8_ppgtt_pde_t *pd_vaddr; 599 pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]); 600 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 601 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; 602 pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr, 603 I915_CACHE_LLC); 604 } 605 if (!HAS_LLC(ppgtt->base.dev)) 606 drm_clflush_virt_range(pd_vaddr, PAGE_SIZE); 607 kunmap_atomic(pd_vaddr); 608 } 609 610 ppgtt->enable = gen8_ppgtt_enable; 611 ppgtt->switch_mm = gen8_mm_switch; 612 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 613 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 614 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 615 ppgtt->base.start = 0; 616 ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE; 617 618 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); 619 620 DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n", 621 ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp); 622 DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n", 623 ppgtt->num_pd_entries, 624 (ppgtt->num_pd_entries - min_pt_pages) + size % (1<<30)); 625 return 0; 626 627 bail: 628 gen8_ppgtt_unmap_pages(ppgtt); 629 gen8_ppgtt_free(ppgtt); 630 return ret; 631 } 632 633 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 634 { 635 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; 636 struct i915_address_space *vm = &ppgtt->base; 637 gen6_gtt_pte_t __iomem *pd_addr; 638 gen6_gtt_pte_t scratch_pte; 639 uint32_t pd_entry; 640 int pte, pde; 641 642 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); 643 644 pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + 645 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); 646 647 seq_printf(m, " VM %p (pd_offset %x-%x):\n", vm, 648 ppgtt->pd_offset, ppgtt->pd_offset + ppgtt->num_pd_entries); 649 for (pde = 0; pde < ppgtt->num_pd_entries; pde++) { 650 u32 expected; 651 gen6_gtt_pte_t *pt_vaddr; 652 dma_addr_t pt_addr = ppgtt->pt_dma_addr[pde]; 653 pd_entry = readl(pd_addr + pde); 654 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 655 656 if (pd_entry != expected) 657 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 658 pde, 659 pd_entry, 660 expected); 661 seq_printf(m, "\tPDE: %x\n", pd_entry); 662 663 pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]); 664 for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) { 665 unsigned long va = 666 (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) + 667 (pte * PAGE_SIZE); 668 int i; 669 bool found = false; 670 for (i = 0; i < 4; i++) 671 if (pt_vaddr[pte + i] != scratch_pte) 672 found = true; 673 if (!found) 674 continue; 675 676 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 677 for (i = 0; i < 4; i++) { 678 if (pt_vaddr[pte + i] != scratch_pte) 679 seq_printf(m, " %08x", pt_vaddr[pte + i]); 680 else 681 seq_puts(m, " SCRATCH "); 682 } 683 seq_puts(m, "\n"); 684 } 685 kunmap_atomic(pt_vaddr); 686 } 687 } 688 689 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) 690 { 691 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; 692 gen6_gtt_pte_t __iomem *pd_addr; 693 uint32_t pd_entry; 694 int i; 695 696 WARN_ON(ppgtt->pd_offset & 0x3f); 697 pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm + 698 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); 699 for (i = 0; i < ppgtt->num_pd_entries; i++) { 700 dma_addr_t pt_addr; 701 702 pt_addr = ppgtt->pt_dma_addr[i]; 703 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); 704 pd_entry |= GEN6_PDE_VALID; 705 706 writel(pd_entry, pd_addr + i); 707 } 708 readl(pd_addr); 709 } 710 711 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) 712 { 713 BUG_ON(ppgtt->pd_offset & 0x3f); 714 715 return (ppgtt->pd_offset / 64) << 16; 716 } 717 718 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 719 struct intel_engine_cs *ring, 720 bool synchronous) 721 { 722 struct drm_device *dev = ppgtt->base.dev; 723 struct drm_i915_private *dev_priv = dev->dev_private; 724 int ret; 725 726 /* If we're in reset, we can assume the GPU is sufficiently idle to 727 * manually frob these bits. Ideally we could use the ring functions, 728 * except our error handling makes it quite difficult (can't use 729 * intel_ring_begin, ring->flush, or intel_ring_advance) 730 * 731 * FIXME: We should try not to special case reset 732 */ 733 if (synchronous || 734 i915_reset_in_progress(&dev_priv->gpu_error)) { 735 WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt); 736 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 737 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 738 POSTING_READ(RING_PP_DIR_BASE(ring)); 739 return 0; 740 } 741 742 /* NB: TLBs must be flushed and invalidated before a switch */ 743 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 744 if (ret) 745 return ret; 746 747 ret = intel_ring_begin(ring, 6); 748 if (ret) 749 return ret; 750 751 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 752 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring)); 753 intel_ring_emit(ring, PP_DIR_DCLV_2G); 754 intel_ring_emit(ring, RING_PP_DIR_BASE(ring)); 755 intel_ring_emit(ring, get_pd_offset(ppgtt)); 756 intel_ring_emit(ring, MI_NOOP); 757 intel_ring_advance(ring); 758 759 return 0; 760 } 761 762 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 763 struct intel_engine_cs *ring, 764 bool synchronous) 765 { 766 struct drm_device *dev = ppgtt->base.dev; 767 struct drm_i915_private *dev_priv = dev->dev_private; 768 int ret; 769 770 /* If we're in reset, we can assume the GPU is sufficiently idle to 771 * manually frob these bits. Ideally we could use the ring functions, 772 * except our error handling makes it quite difficult (can't use 773 * intel_ring_begin, ring->flush, or intel_ring_advance) 774 * 775 * FIXME: We should try not to special case reset 776 */ 777 if (synchronous || 778 i915_reset_in_progress(&dev_priv->gpu_error)) { 779 WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt); 780 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 781 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 782 POSTING_READ(RING_PP_DIR_BASE(ring)); 783 return 0; 784 } 785 786 /* NB: TLBs must be flushed and invalidated before a switch */ 787 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 788 if (ret) 789 return ret; 790 791 ret = intel_ring_begin(ring, 6); 792 if (ret) 793 return ret; 794 795 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 796 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring)); 797 intel_ring_emit(ring, PP_DIR_DCLV_2G); 798 intel_ring_emit(ring, RING_PP_DIR_BASE(ring)); 799 intel_ring_emit(ring, get_pd_offset(ppgtt)); 800 intel_ring_emit(ring, MI_NOOP); 801 intel_ring_advance(ring); 802 803 /* XXX: RCS is the only one to auto invalidate the TLBs? */ 804 if (ring->id != RCS) { 805 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 806 if (ret) 807 return ret; 808 } 809 810 return 0; 811 } 812 813 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 814 struct intel_engine_cs *ring, 815 bool synchronous) 816 { 817 struct drm_device *dev = ppgtt->base.dev; 818 struct drm_i915_private *dev_priv = dev->dev_private; 819 820 if (!synchronous) 821 return 0; 822 823 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 824 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 825 826 POSTING_READ(RING_PP_DIR_DCLV(ring)); 827 828 return 0; 829 } 830 831 static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) 832 { 833 struct drm_device *dev = ppgtt->base.dev; 834 struct drm_i915_private *dev_priv = dev->dev_private; 835 struct intel_engine_cs *ring; 836 int j, ret; 837 838 for_each_ring(ring, dev_priv, j) { 839 I915_WRITE(RING_MODE_GEN7(ring), 840 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 841 842 /* We promise to do a switch later with FULL PPGTT. If this is 843 * aliasing, this is the one and only switch we'll do */ 844 if (USES_FULL_PPGTT(dev)) 845 continue; 846 847 ret = ppgtt->switch_mm(ppgtt, ring, true); 848 if (ret) 849 goto err_out; 850 } 851 852 return 0; 853 854 err_out: 855 for_each_ring(ring, dev_priv, j) 856 I915_WRITE(RING_MODE_GEN7(ring), 857 _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE)); 858 return ret; 859 } 860 861 static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) 862 { 863 struct drm_device *dev = ppgtt->base.dev; 864 struct drm_i915_private *dev_priv = dev->dev_private; 865 struct intel_engine_cs *ring; 866 uint32_t ecochk, ecobits; 867 int i; 868 869 ecobits = I915_READ(GAC_ECO_BITS); 870 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 871 872 ecochk = I915_READ(GAM_ECOCHK); 873 if (IS_HASWELL(dev)) { 874 ecochk |= ECOCHK_PPGTT_WB_HSW; 875 } else { 876 ecochk |= ECOCHK_PPGTT_LLC_IVB; 877 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 878 } 879 I915_WRITE(GAM_ECOCHK, ecochk); 880 881 for_each_ring(ring, dev_priv, i) { 882 int ret; 883 /* GFX_MODE is per-ring on gen7+ */ 884 I915_WRITE(RING_MODE_GEN7(ring), 885 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 886 887 /* We promise to do a switch later with FULL PPGTT. If this is 888 * aliasing, this is the one and only switch we'll do */ 889 if (USES_FULL_PPGTT(dev)) 890 continue; 891 892 ret = ppgtt->switch_mm(ppgtt, ring, true); 893 if (ret) 894 return ret; 895 } 896 897 return 0; 898 } 899 900 static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) 901 { 902 struct drm_device *dev = ppgtt->base.dev; 903 struct drm_i915_private *dev_priv = dev->dev_private; 904 struct intel_engine_cs *ring; 905 uint32_t ecochk, gab_ctl, ecobits; 906 int i; 907 908 ecobits = I915_READ(GAC_ECO_BITS); 909 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 910 ECOBITS_PPGTT_CACHE64B); 911 912 gab_ctl = I915_READ(GAB_CTL); 913 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 914 915 ecochk = I915_READ(GAM_ECOCHK); 916 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 917 918 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 919 920 for_each_ring(ring, dev_priv, i) { 921 int ret = ppgtt->switch_mm(ppgtt, ring, true); 922 if (ret) 923 return ret; 924 } 925 926 return 0; 927 } 928 929 /* PPGTT support for Sandybdrige/Gen6 and later */ 930 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 931 uint64_t start, 932 uint64_t length, 933 bool use_scratch) 934 { 935 struct i915_hw_ppgtt *ppgtt = 936 container_of(vm, struct i915_hw_ppgtt, base); 937 gen6_gtt_pte_t *pt_vaddr, scratch_pte; 938 unsigned first_entry = start >> PAGE_SHIFT; 939 unsigned num_entries = length >> PAGE_SHIFT; 940 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 941 unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; 942 unsigned last_pte, i; 943 944 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); 945 946 while (num_entries) { 947 last_pte = first_pte + num_entries; 948 if (last_pte > I915_PPGTT_PT_ENTRIES) 949 last_pte = I915_PPGTT_PT_ENTRIES; 950 951 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 952 953 for (i = first_pte; i < last_pte; i++) 954 pt_vaddr[i] = scratch_pte; 955 956 kunmap_atomic(pt_vaddr); 957 958 num_entries -= last_pte - first_pte; 959 first_pte = 0; 960 act_pt++; 961 } 962 } 963 964 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 965 struct sg_table *pages, 966 uint64_t start, 967 enum i915_cache_level cache_level) 968 { 969 struct i915_hw_ppgtt *ppgtt = 970 container_of(vm, struct i915_hw_ppgtt, base); 971 gen6_gtt_pte_t *pt_vaddr; 972 unsigned first_entry = start >> PAGE_SHIFT; 973 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 974 unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES; 975 struct sg_page_iter sg_iter; 976 977 pt_vaddr = NULL; 978 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { 979 if (pt_vaddr == NULL) 980 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 981 982 pt_vaddr[act_pte] = 983 vm->pte_encode(sg_page_iter_dma_address(&sg_iter), 984 cache_level, true); 985 if (++act_pte == I915_PPGTT_PT_ENTRIES) { 986 kunmap_atomic(pt_vaddr); 987 pt_vaddr = NULL; 988 act_pt++; 989 act_pte = 0; 990 } 991 } 992 if (pt_vaddr) 993 kunmap_atomic(pt_vaddr); 994 } 995 996 static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) 997 { 998 int i; 999 1000 if (ppgtt->pt_dma_addr) { 1001 for (i = 0; i < ppgtt->num_pd_entries; i++) 1002 pci_unmap_page(ppgtt->base.dev->pdev, 1003 ppgtt->pt_dma_addr[i], 1004 4096, PCI_DMA_BIDIRECTIONAL); 1005 } 1006 } 1007 1008 static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt) 1009 { 1010 int i; 1011 1012 kfree(ppgtt->pt_dma_addr); 1013 for (i = 0; i < ppgtt->num_pd_entries; i++) 1014 __free_page(ppgtt->pt_pages[i]); 1015 kfree(ppgtt->pt_pages); 1016 } 1017 1018 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 1019 { 1020 struct i915_hw_ppgtt *ppgtt = 1021 container_of(vm, struct i915_hw_ppgtt, base); 1022 1023 list_del(&vm->global_link); 1024 drm_mm_takedown(&ppgtt->base.mm); 1025 drm_mm_remove_node(&ppgtt->node); 1026 1027 gen6_ppgtt_unmap_pages(ppgtt); 1028 gen6_ppgtt_free(ppgtt); 1029 } 1030 1031 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 1032 { 1033 struct drm_device *dev = ppgtt->base.dev; 1034 struct drm_i915_private *dev_priv = dev->dev_private; 1035 bool retried = false; 1036 int ret; 1037 1038 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 1039 * allocator works in address space sizes, so it's multiplied by page 1040 * size. We allocate at the top of the GTT to avoid fragmentation. 1041 */ 1042 BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm)); 1043 alloc: 1044 ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm, 1045 &ppgtt->node, GEN6_PD_SIZE, 1046 GEN6_PD_ALIGN, 0, 1047 0, dev_priv->gtt.base.total, 1048 DRM_MM_TOPDOWN); 1049 if (ret == -ENOSPC && !retried) { 1050 ret = i915_gem_evict_something(dev, &dev_priv->gtt.base, 1051 GEN6_PD_SIZE, GEN6_PD_ALIGN, 1052 I915_CACHE_NONE, 1053 0, dev_priv->gtt.base.total, 1054 0); 1055 if (ret) 1056 return ret; 1057 1058 retried = true; 1059 goto alloc; 1060 } 1061 1062 if (ppgtt->node.start < dev_priv->gtt.mappable_end) 1063 DRM_DEBUG("Forced to use aperture for PDEs\n"); 1064 1065 ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES; 1066 return ret; 1067 } 1068 1069 static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt) 1070 { 1071 int i; 1072 1073 ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *), 1074 GFP_KERNEL); 1075 1076 if (!ppgtt->pt_pages) 1077 return -ENOMEM; 1078 1079 for (i = 0; i < ppgtt->num_pd_entries; i++) { 1080 ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL); 1081 if (!ppgtt->pt_pages[i]) { 1082 gen6_ppgtt_free(ppgtt); 1083 return -ENOMEM; 1084 } 1085 } 1086 1087 return 0; 1088 } 1089 1090 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 1091 { 1092 int ret; 1093 1094 ret = gen6_ppgtt_allocate_page_directories(ppgtt); 1095 if (ret) 1096 return ret; 1097 1098 ret = gen6_ppgtt_allocate_page_tables(ppgtt); 1099 if (ret) { 1100 drm_mm_remove_node(&ppgtt->node); 1101 return ret; 1102 } 1103 1104 ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t), 1105 GFP_KERNEL); 1106 if (!ppgtt->pt_dma_addr) { 1107 drm_mm_remove_node(&ppgtt->node); 1108 gen6_ppgtt_free(ppgtt); 1109 return -ENOMEM; 1110 } 1111 1112 return 0; 1113 } 1114 1115 static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt) 1116 { 1117 struct drm_device *dev = ppgtt->base.dev; 1118 int i; 1119 1120 for (i = 0; i < ppgtt->num_pd_entries; i++) { 1121 dma_addr_t pt_addr; 1122 1123 pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096, 1124 PCI_DMA_BIDIRECTIONAL); 1125 1126 if (pci_dma_mapping_error(dev->pdev, pt_addr)) { 1127 gen6_ppgtt_unmap_pages(ppgtt); 1128 return -EIO; 1129 } 1130 1131 ppgtt->pt_dma_addr[i] = pt_addr; 1132 } 1133 1134 return 0; 1135 } 1136 1137 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1138 { 1139 struct drm_device *dev = ppgtt->base.dev; 1140 struct drm_i915_private *dev_priv = dev->dev_private; 1141 int ret; 1142 1143 ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; 1144 if (IS_GEN6(dev)) { 1145 ppgtt->enable = gen6_ppgtt_enable; 1146 ppgtt->switch_mm = gen6_mm_switch; 1147 } else if (IS_HASWELL(dev)) { 1148 ppgtt->enable = gen7_ppgtt_enable; 1149 ppgtt->switch_mm = hsw_mm_switch; 1150 } else if (IS_GEN7(dev)) { 1151 ppgtt->enable = gen7_ppgtt_enable; 1152 ppgtt->switch_mm = gen7_mm_switch; 1153 } else 1154 BUG(); 1155 1156 ret = gen6_ppgtt_alloc(ppgtt); 1157 if (ret) 1158 return ret; 1159 1160 ret = gen6_ppgtt_setup_page_tables(ppgtt); 1161 if (ret) { 1162 gen6_ppgtt_free(ppgtt); 1163 return ret; 1164 } 1165 1166 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 1167 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 1168 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 1169 ppgtt->base.start = 0; 1170 ppgtt->base.total = ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES * PAGE_SIZE; 1171 ppgtt->debug_dump = gen6_dump_ppgtt; 1172 1173 ppgtt->pd_offset = 1174 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t); 1175 1176 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); 1177 1178 DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n", 1179 ppgtt->node.size >> 20, 1180 ppgtt->node.start / PAGE_SIZE); 1181 1182 return 0; 1183 } 1184 1185 int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) 1186 { 1187 struct drm_i915_private *dev_priv = dev->dev_private; 1188 int ret = 0; 1189 1190 ppgtt->base.dev = dev; 1191 ppgtt->base.scratch = dev_priv->gtt.base.scratch; 1192 1193 if (INTEL_INFO(dev)->gen < 8) 1194 ret = gen6_ppgtt_init(ppgtt); 1195 else if (IS_GEN8(dev)) 1196 ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total); 1197 else 1198 BUG(); 1199 1200 if (!ret) { 1201 struct drm_i915_private *dev_priv = dev->dev_private; 1202 kref_init(&ppgtt->ref); 1203 drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, 1204 ppgtt->base.total); 1205 i915_init_vm(dev_priv, &ppgtt->base); 1206 if (INTEL_INFO(dev)->gen < 8) { 1207 gen6_write_pdes(ppgtt); 1208 DRM_DEBUG("Adding PPGTT at offset %x\n", 1209 ppgtt->pd_offset << 10); 1210 } 1211 } 1212 1213 return ret; 1214 } 1215 1216 static void 1217 ppgtt_bind_vma(struct i915_vma *vma, 1218 enum i915_cache_level cache_level, 1219 u32 flags) 1220 { 1221 vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start, 1222 cache_level); 1223 } 1224 1225 static void ppgtt_unbind_vma(struct i915_vma *vma) 1226 { 1227 vma->vm->clear_range(vma->vm, 1228 vma->node.start, 1229 vma->obj->base.size, 1230 true); 1231 } 1232 1233 extern int intel_iommu_gfx_mapped; 1234 /* Certain Gen5 chipsets require require idling the GPU before 1235 * unmapping anything from the GTT when VT-d is enabled. 1236 */ 1237 static inline bool needs_idle_maps(struct drm_device *dev) 1238 { 1239 #ifdef CONFIG_INTEL_IOMMU 1240 /* Query intel_iommu to see if we need the workaround. Presumably that 1241 * was loaded first. 1242 */ 1243 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) 1244 return true; 1245 #endif 1246 return false; 1247 } 1248 1249 static bool do_idling(struct drm_i915_private *dev_priv) 1250 { 1251 bool ret = dev_priv->mm.interruptible; 1252 1253 if (unlikely(dev_priv->gtt.do_idle_maps)) { 1254 dev_priv->mm.interruptible = false; 1255 if (i915_gpu_idle(dev_priv->dev)) { 1256 DRM_ERROR("Couldn't idle GPU\n"); 1257 /* Wait a bit, in hopes it avoids the hang */ 1258 udelay(10); 1259 } 1260 } 1261 1262 return ret; 1263 } 1264 1265 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) 1266 { 1267 if (unlikely(dev_priv->gtt.do_idle_maps)) 1268 dev_priv->mm.interruptible = interruptible; 1269 } 1270 1271 void i915_check_and_clear_faults(struct drm_device *dev) 1272 { 1273 struct drm_i915_private *dev_priv = dev->dev_private; 1274 struct intel_engine_cs *ring; 1275 int i; 1276 1277 if (INTEL_INFO(dev)->gen < 6) 1278 return; 1279 1280 for_each_ring(ring, dev_priv, i) { 1281 u32 fault_reg; 1282 fault_reg = I915_READ(RING_FAULT_REG(ring)); 1283 if (fault_reg & RING_FAULT_VALID) { 1284 DRM_DEBUG_DRIVER("Unexpected fault\n" 1285 "\tAddr: 0x%08lx\\n" 1286 "\tAddress space: %s\n" 1287 "\tSource ID: %d\n" 1288 "\tType: %d\n", 1289 fault_reg & PAGE_MASK, 1290 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 1291 RING_FAULT_SRCID(fault_reg), 1292 RING_FAULT_FAULT_TYPE(fault_reg)); 1293 I915_WRITE(RING_FAULT_REG(ring), 1294 fault_reg & ~RING_FAULT_VALID); 1295 } 1296 } 1297 POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS])); 1298 } 1299 1300 void i915_gem_suspend_gtt_mappings(struct drm_device *dev) 1301 { 1302 struct drm_i915_private *dev_priv = dev->dev_private; 1303 1304 /* Don't bother messing with faults pre GEN6 as we have little 1305 * documentation supporting that it's a good idea. 1306 */ 1307 if (INTEL_INFO(dev)->gen < 6) 1308 return; 1309 1310 i915_check_and_clear_faults(dev); 1311 1312 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 1313 dev_priv->gtt.base.start, 1314 dev_priv->gtt.base.total, 1315 true); 1316 } 1317 1318 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 1319 { 1320 struct drm_i915_private *dev_priv = dev->dev_private; 1321 struct drm_i915_gem_object *obj; 1322 struct i915_address_space *vm; 1323 1324 i915_check_and_clear_faults(dev); 1325 1326 /* First fill our portion of the GTT with scratch pages */ 1327 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 1328 dev_priv->gtt.base.start, 1329 dev_priv->gtt.base.total, 1330 true); 1331 1332 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 1333 struct i915_vma *vma = i915_gem_obj_to_vma(obj, 1334 &dev_priv->gtt.base); 1335 if (!vma) 1336 continue; 1337 1338 i915_gem_clflush_object(obj, obj->pin_display); 1339 /* The bind_vma code tries to be smart about tracking mappings. 1340 * Unfortunately above, we've just wiped out the mappings 1341 * without telling our object about it. So we need to fake it. 1342 */ 1343 obj->has_global_gtt_mapping = 0; 1344 vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND); 1345 } 1346 1347 1348 if (INTEL_INFO(dev)->gen >= 8) { 1349 if (IS_CHERRYVIEW(dev)) 1350 chv_setup_private_ppat(dev_priv); 1351 else 1352 bdw_setup_private_ppat(dev_priv); 1353 1354 return; 1355 } 1356 1357 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 1358 /* TODO: Perhaps it shouldn't be gen6 specific */ 1359 if (i915_is_ggtt(vm)) { 1360 if (dev_priv->mm.aliasing_ppgtt) 1361 gen6_write_pdes(dev_priv->mm.aliasing_ppgtt); 1362 continue; 1363 } 1364 1365 gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base)); 1366 } 1367 1368 i915_gem_chipset_flush(dev); 1369 } 1370 1371 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) 1372 { 1373 if (obj->has_dma_mapping) 1374 return 0; 1375 1376 if (!dma_map_sg(&obj->base.dev->pdev->dev, 1377 obj->pages->sgl, obj->pages->nents, 1378 PCI_DMA_BIDIRECTIONAL)) 1379 return -ENOSPC; 1380 1381 return 0; 1382 } 1383 1384 static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte) 1385 { 1386 #ifdef writeq 1387 writeq(pte, addr); 1388 #else 1389 iowrite32((u32)pte, addr); 1390 iowrite32(pte >> 32, addr + 4); 1391 #endif 1392 } 1393 1394 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 1395 struct sg_table *st, 1396 uint64_t start, 1397 enum i915_cache_level level) 1398 { 1399 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1400 unsigned first_entry = start >> PAGE_SHIFT; 1401 gen8_gtt_pte_t __iomem *gtt_entries = 1402 (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1403 int i = 0; 1404 struct sg_page_iter sg_iter; 1405 dma_addr_t addr = 0; 1406 1407 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 1408 addr = sg_dma_address(sg_iter.sg) + 1409 (sg_iter.sg_pgoffset << PAGE_SHIFT); 1410 gen8_set_pte(>t_entries[i], 1411 gen8_pte_encode(addr, level, true)); 1412 i++; 1413 } 1414 1415 /* 1416 * XXX: This serves as a posting read to make sure that the PTE has 1417 * actually been updated. There is some concern that even though 1418 * registers and PTEs are within the same BAR that they are potentially 1419 * of NUMA access patterns. Therefore, even with the way we assume 1420 * hardware should work, we must keep this posting read for paranoia. 1421 */ 1422 if (i != 0) 1423 WARN_ON(readq(>t_entries[i-1]) 1424 != gen8_pte_encode(addr, level, true)); 1425 1426 /* This next bit makes the above posting read even more important. We 1427 * want to flush the TLBs only after we're certain all the PTE updates 1428 * have finished. 1429 */ 1430 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1431 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1432 } 1433 1434 /* 1435 * Binds an object into the global gtt with the specified cache level. The object 1436 * will be accessible to the GPU via commands whose operands reference offsets 1437 * within the global GTT as well as accessible by the GPU through the GMADR 1438 * mapped BAR (dev_priv->mm.gtt->gtt). 1439 */ 1440 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 1441 struct sg_table *st, 1442 uint64_t start, 1443 enum i915_cache_level level) 1444 { 1445 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1446 unsigned first_entry = start >> PAGE_SHIFT; 1447 gen6_gtt_pte_t __iomem *gtt_entries = 1448 (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1449 int i = 0; 1450 struct sg_page_iter sg_iter; 1451 dma_addr_t addr; 1452 1453 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 1454 addr = sg_page_iter_dma_address(&sg_iter); 1455 iowrite32(vm->pte_encode(addr, level, true), >t_entries[i]); 1456 i++; 1457 } 1458 1459 /* XXX: This serves as a posting read to make sure that the PTE has 1460 * actually been updated. There is some concern that even though 1461 * registers and PTEs are within the same BAR that they are potentially 1462 * of NUMA access patterns. Therefore, even with the way we assume 1463 * hardware should work, we must keep this posting read for paranoia. 1464 */ 1465 if (i != 0) 1466 WARN_ON(readl(>t_entries[i-1]) != 1467 vm->pte_encode(addr, level, true)); 1468 1469 /* This next bit makes the above posting read even more important. We 1470 * want to flush the TLBs only after we're certain all the PTE updates 1471 * have finished. 1472 */ 1473 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1474 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1475 } 1476 1477 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 1478 uint64_t start, 1479 uint64_t length, 1480 bool use_scratch) 1481 { 1482 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1483 unsigned first_entry = start >> PAGE_SHIFT; 1484 unsigned num_entries = length >> PAGE_SHIFT; 1485 gen8_gtt_pte_t scratch_pte, __iomem *gtt_base = 1486 (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 1487 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 1488 int i; 1489 1490 if (WARN(num_entries > max_entries, 1491 "First entry = %d; Num entries = %d (max=%d)\n", 1492 first_entry, num_entries, max_entries)) 1493 num_entries = max_entries; 1494 1495 scratch_pte = gen8_pte_encode(vm->scratch.addr, 1496 I915_CACHE_LLC, 1497 use_scratch); 1498 for (i = 0; i < num_entries; i++) 1499 gen8_set_pte(>t_base[i], scratch_pte); 1500 readl(gtt_base); 1501 } 1502 1503 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 1504 uint64_t start, 1505 uint64_t length, 1506 bool use_scratch) 1507 { 1508 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1509 unsigned first_entry = start >> PAGE_SHIFT; 1510 unsigned num_entries = length >> PAGE_SHIFT; 1511 gen6_gtt_pte_t scratch_pte, __iomem *gtt_base = 1512 (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 1513 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 1514 int i; 1515 1516 if (WARN(num_entries > max_entries, 1517 "First entry = %d; Num entries = %d (max=%d)\n", 1518 first_entry, num_entries, max_entries)) 1519 num_entries = max_entries; 1520 1521 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch); 1522 1523 for (i = 0; i < num_entries; i++) 1524 iowrite32(scratch_pte, >t_base[i]); 1525 readl(gtt_base); 1526 } 1527 1528 1529 static void i915_ggtt_bind_vma(struct i915_vma *vma, 1530 enum i915_cache_level cache_level, 1531 u32 unused) 1532 { 1533 const unsigned long entry = vma->node.start >> PAGE_SHIFT; 1534 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 1535 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 1536 1537 BUG_ON(!i915_is_ggtt(vma->vm)); 1538 intel_gtt_insert_sg_entries(vma->obj->pages, entry, flags); 1539 vma->obj->has_global_gtt_mapping = 1; 1540 } 1541 1542 static void i915_ggtt_clear_range(struct i915_address_space *vm, 1543 uint64_t start, 1544 uint64_t length, 1545 bool unused) 1546 { 1547 unsigned first_entry = start >> PAGE_SHIFT; 1548 unsigned num_entries = length >> PAGE_SHIFT; 1549 intel_gtt_clear_range(first_entry, num_entries); 1550 } 1551 1552 static void i915_ggtt_unbind_vma(struct i915_vma *vma) 1553 { 1554 const unsigned int first = vma->node.start >> PAGE_SHIFT; 1555 const unsigned int size = vma->obj->base.size >> PAGE_SHIFT; 1556 1557 BUG_ON(!i915_is_ggtt(vma->vm)); 1558 vma->obj->has_global_gtt_mapping = 0; 1559 intel_gtt_clear_range(first, size); 1560 } 1561 1562 static void ggtt_bind_vma(struct i915_vma *vma, 1563 enum i915_cache_level cache_level, 1564 u32 flags) 1565 { 1566 struct drm_device *dev = vma->vm->dev; 1567 struct drm_i915_private *dev_priv = dev->dev_private; 1568 struct drm_i915_gem_object *obj = vma->obj; 1569 1570 /* If there is no aliasing PPGTT, or the caller needs a global mapping, 1571 * or we have a global mapping already but the cacheability flags have 1572 * changed, set the global PTEs. 1573 * 1574 * If there is an aliasing PPGTT it is anecdotally faster, so use that 1575 * instead if none of the above hold true. 1576 * 1577 * NB: A global mapping should only be needed for special regions like 1578 * "gtt mappable", SNB errata, or if specified via special execbuf 1579 * flags. At all other times, the GPU will use the aliasing PPGTT. 1580 */ 1581 if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) { 1582 if (!obj->has_global_gtt_mapping || 1583 (cache_level != obj->cache_level)) { 1584 vma->vm->insert_entries(vma->vm, obj->pages, 1585 vma->node.start, 1586 cache_level); 1587 obj->has_global_gtt_mapping = 1; 1588 } 1589 } 1590 1591 if (dev_priv->mm.aliasing_ppgtt && 1592 (!obj->has_aliasing_ppgtt_mapping || 1593 (cache_level != obj->cache_level))) { 1594 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 1595 appgtt->base.insert_entries(&appgtt->base, 1596 vma->obj->pages, 1597 vma->node.start, 1598 cache_level); 1599 vma->obj->has_aliasing_ppgtt_mapping = 1; 1600 } 1601 } 1602 1603 static void ggtt_unbind_vma(struct i915_vma *vma) 1604 { 1605 struct drm_device *dev = vma->vm->dev; 1606 struct drm_i915_private *dev_priv = dev->dev_private; 1607 struct drm_i915_gem_object *obj = vma->obj; 1608 1609 if (obj->has_global_gtt_mapping) { 1610 vma->vm->clear_range(vma->vm, 1611 vma->node.start, 1612 obj->base.size, 1613 true); 1614 obj->has_global_gtt_mapping = 0; 1615 } 1616 1617 if (obj->has_aliasing_ppgtt_mapping) { 1618 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 1619 appgtt->base.clear_range(&appgtt->base, 1620 vma->node.start, 1621 obj->base.size, 1622 true); 1623 obj->has_aliasing_ppgtt_mapping = 0; 1624 } 1625 } 1626 1627 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 1628 { 1629 struct drm_device *dev = obj->base.dev; 1630 struct drm_i915_private *dev_priv = dev->dev_private; 1631 bool interruptible; 1632 1633 interruptible = do_idling(dev_priv); 1634 1635 if (!obj->has_dma_mapping) 1636 dma_unmap_sg(&dev->pdev->dev, 1637 obj->pages->sgl, obj->pages->nents, 1638 PCI_DMA_BIDIRECTIONAL); 1639 1640 undo_idling(dev_priv, interruptible); 1641 } 1642 1643 static void i915_gtt_color_adjust(struct drm_mm_node *node, 1644 unsigned long color, 1645 unsigned long *start, 1646 unsigned long *end) 1647 { 1648 if (node->color != color) 1649 *start += 4096; 1650 1651 if (!list_empty(&node->node_list)) { 1652 node = list_entry(node->node_list.next, 1653 struct drm_mm_node, 1654 node_list); 1655 if (node->allocated && node->color != color) 1656 *end -= 4096; 1657 } 1658 } 1659 1660 void i915_gem_setup_global_gtt(struct drm_device *dev, 1661 unsigned long start, 1662 unsigned long mappable_end, 1663 unsigned long end) 1664 { 1665 /* Let GEM Manage all of the aperture. 1666 * 1667 * However, leave one page at the end still bound to the scratch page. 1668 * There are a number of places where the hardware apparently prefetches 1669 * past the end of the object, and we've seen multiple hangs with the 1670 * GPU head pointer stuck in a batchbuffer bound at the last page of the 1671 * aperture. One page should be enough to keep any prefetching inside 1672 * of the aperture. 1673 */ 1674 struct drm_i915_private *dev_priv = dev->dev_private; 1675 struct i915_address_space *ggtt_vm = &dev_priv->gtt.base; 1676 struct drm_mm_node *entry; 1677 struct drm_i915_gem_object *obj; 1678 unsigned long hole_start, hole_end; 1679 1680 BUG_ON(mappable_end > end); 1681 1682 /* Subtract the guard page ... */ 1683 drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE); 1684 if (!HAS_LLC(dev)) 1685 dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust; 1686 1687 /* Mark any preallocated objects as occupied */ 1688 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 1689 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); 1690 int ret; 1691 DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n", 1692 i915_gem_obj_ggtt_offset(obj), obj->base.size); 1693 1694 WARN_ON(i915_gem_obj_ggtt_bound(obj)); 1695 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node); 1696 if (ret) 1697 DRM_DEBUG_KMS("Reservation failed\n"); 1698 obj->has_global_gtt_mapping = 1; 1699 } 1700 1701 dev_priv->gtt.base.start = start; 1702 dev_priv->gtt.base.total = end - start; 1703 1704 /* Clear any non-preallocated blocks */ 1705 drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) { 1706 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 1707 hole_start, hole_end); 1708 ggtt_vm->clear_range(ggtt_vm, hole_start, 1709 hole_end - hole_start, true); 1710 } 1711 1712 /* And finally clear the reserved guard page */ 1713 ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true); 1714 } 1715 1716 void i915_gem_init_global_gtt(struct drm_device *dev) 1717 { 1718 struct drm_i915_private *dev_priv = dev->dev_private; 1719 unsigned long gtt_size, mappable_size; 1720 1721 gtt_size = dev_priv->gtt.base.total; 1722 mappable_size = dev_priv->gtt.mappable_end; 1723 1724 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); 1725 } 1726 1727 static int setup_scratch_page(struct drm_device *dev) 1728 { 1729 struct drm_i915_private *dev_priv = dev->dev_private; 1730 struct page *page; 1731 dma_addr_t dma_addr; 1732 1733 page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); 1734 if (page == NULL) 1735 return -ENOMEM; 1736 get_page(page); 1737 set_pages_uc(page, 1); 1738 1739 #ifdef CONFIG_INTEL_IOMMU 1740 dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE, 1741 PCI_DMA_BIDIRECTIONAL); 1742 if (pci_dma_mapping_error(dev->pdev, dma_addr)) 1743 return -EINVAL; 1744 #else 1745 dma_addr = page_to_phys(page); 1746 #endif 1747 dev_priv->gtt.base.scratch.page = page; 1748 dev_priv->gtt.base.scratch.addr = dma_addr; 1749 1750 return 0; 1751 } 1752 1753 static void teardown_scratch_page(struct drm_device *dev) 1754 { 1755 struct drm_i915_private *dev_priv = dev->dev_private; 1756 struct page *page = dev_priv->gtt.base.scratch.page; 1757 1758 set_pages_wb(page, 1); 1759 pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr, 1760 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 1761 put_page(page); 1762 __free_page(page); 1763 } 1764 1765 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 1766 { 1767 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 1768 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 1769 return snb_gmch_ctl << 20; 1770 } 1771 1772 static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 1773 { 1774 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 1775 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 1776 if (bdw_gmch_ctl) 1777 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 1778 1779 #ifdef CONFIG_X86_32 1780 /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ 1781 if (bdw_gmch_ctl > 4) 1782 bdw_gmch_ctl = 4; 1783 #endif 1784 1785 return bdw_gmch_ctl << 20; 1786 } 1787 1788 static inline unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) 1789 { 1790 gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; 1791 gmch_ctrl &= SNB_GMCH_GGMS_MASK; 1792 1793 if (gmch_ctrl) 1794 return 1 << (20 + gmch_ctrl); 1795 1796 return 0; 1797 } 1798 1799 static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 1800 { 1801 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 1802 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 1803 return snb_gmch_ctl << 25; /* 32 MB units */ 1804 } 1805 1806 static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 1807 { 1808 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 1809 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 1810 return bdw_gmch_ctl << 25; /* 32 MB units */ 1811 } 1812 1813 static size_t chv_get_stolen_size(u16 gmch_ctrl) 1814 { 1815 gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; 1816 gmch_ctrl &= SNB_GMCH_GMS_MASK; 1817 1818 /* 1819 * 0x0 to 0x10: 32MB increments starting at 0MB 1820 * 0x11 to 0x16: 4MB increments starting at 8MB 1821 * 0x17 to 0x1d: 4MB increments start at 36MB 1822 */ 1823 if (gmch_ctrl < 0x11) 1824 return gmch_ctrl << 25; 1825 else if (gmch_ctrl < 0x17) 1826 return (gmch_ctrl - 0x11 + 2) << 22; 1827 else 1828 return (gmch_ctrl - 0x17 + 9) << 22; 1829 } 1830 1831 static int ggtt_probe_common(struct drm_device *dev, 1832 size_t gtt_size) 1833 { 1834 struct drm_i915_private *dev_priv = dev->dev_private; 1835 phys_addr_t gtt_phys_addr; 1836 int ret; 1837 1838 /* For Modern GENs the PTEs and register space are split in the BAR */ 1839 gtt_phys_addr = pci_resource_start(dev->pdev, 0) + 1840 (pci_resource_len(dev->pdev, 0) / 2); 1841 1842 dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size); 1843 if (!dev_priv->gtt.gsm) { 1844 DRM_ERROR("Failed to map the gtt page table\n"); 1845 return -ENOMEM; 1846 } 1847 1848 ret = setup_scratch_page(dev); 1849 if (ret) { 1850 DRM_ERROR("Scratch setup failed\n"); 1851 /* iounmap will also get called at remove, but meh */ 1852 iounmap(dev_priv->gtt.gsm); 1853 } 1854 1855 return ret; 1856 } 1857 1858 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 1859 * bits. When using advanced contexts each context stores its own PAT, but 1860 * writing this data shouldn't be harmful even in those cases. */ 1861 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) 1862 { 1863 uint64_t pat; 1864 1865 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 1866 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 1867 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 1868 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 1869 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 1870 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 1871 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 1872 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 1873 1874 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 1875 * write would work. */ 1876 I915_WRITE(GEN8_PRIVATE_PAT, pat); 1877 I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32); 1878 } 1879 1880 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) 1881 { 1882 uint64_t pat; 1883 1884 /* 1885 * Map WB on BDW to snooped on CHV. 1886 * 1887 * Only the snoop bit has meaning for CHV, the rest is 1888 * ignored. 1889 * 1890 * Note that the harware enforces snooping for all page 1891 * table accesses. The snoop bit is actually ignored for 1892 * PDEs. 1893 */ 1894 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 1895 GEN8_PPAT(1, 0) | 1896 GEN8_PPAT(2, 0) | 1897 GEN8_PPAT(3, 0) | 1898 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 1899 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 1900 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 1901 GEN8_PPAT(7, CHV_PPAT_SNOOP); 1902 1903 I915_WRITE(GEN8_PRIVATE_PAT, pat); 1904 I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32); 1905 } 1906 1907 static int gen8_gmch_probe(struct drm_device *dev, 1908 size_t *gtt_total, 1909 size_t *stolen, 1910 phys_addr_t *mappable_base, 1911 unsigned long *mappable_end) 1912 { 1913 struct drm_i915_private *dev_priv = dev->dev_private; 1914 unsigned int gtt_size; 1915 u16 snb_gmch_ctl; 1916 int ret; 1917 1918 /* TODO: We're not aware of mappable constraints on gen8 yet */ 1919 *mappable_base = pci_resource_start(dev->pdev, 2); 1920 *mappable_end = pci_resource_len(dev->pdev, 2); 1921 1922 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39))) 1923 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39)); 1924 1925 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1926 1927 if (IS_CHERRYVIEW(dev)) { 1928 *stolen = chv_get_stolen_size(snb_gmch_ctl); 1929 gtt_size = chv_get_total_gtt_size(snb_gmch_ctl); 1930 } else { 1931 *stolen = gen8_get_stolen_size(snb_gmch_ctl); 1932 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl); 1933 } 1934 1935 *gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT; 1936 1937 if (IS_CHERRYVIEW(dev)) 1938 chv_setup_private_ppat(dev_priv); 1939 else 1940 bdw_setup_private_ppat(dev_priv); 1941 1942 ret = ggtt_probe_common(dev, gtt_size); 1943 1944 dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range; 1945 dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries; 1946 1947 return ret; 1948 } 1949 1950 static int gen6_gmch_probe(struct drm_device *dev, 1951 size_t *gtt_total, 1952 size_t *stolen, 1953 phys_addr_t *mappable_base, 1954 unsigned long *mappable_end) 1955 { 1956 struct drm_i915_private *dev_priv = dev->dev_private; 1957 unsigned int gtt_size; 1958 u16 snb_gmch_ctl; 1959 int ret; 1960 1961 *mappable_base = pci_resource_start(dev->pdev, 2); 1962 *mappable_end = pci_resource_len(dev->pdev, 2); 1963 1964 /* 64/512MB is the current min/max we actually know of, but this is just 1965 * a coarse sanity check. 1966 */ 1967 if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) { 1968 DRM_ERROR("Unknown GMADR size (%lx)\n", 1969 dev_priv->gtt.mappable_end); 1970 return -ENXIO; 1971 } 1972 1973 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) 1974 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); 1975 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1976 1977 *stolen = gen6_get_stolen_size(snb_gmch_ctl); 1978 1979 gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl); 1980 *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT; 1981 1982 ret = ggtt_probe_common(dev, gtt_size); 1983 1984 dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range; 1985 dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries; 1986 1987 return ret; 1988 } 1989 1990 static void gen6_gmch_remove(struct i915_address_space *vm) 1991 { 1992 1993 struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base); 1994 1995 if (drm_mm_initialized(&vm->mm)) { 1996 drm_mm_takedown(&vm->mm); 1997 list_del(&vm->global_link); 1998 } 1999 iounmap(gtt->gsm); 2000 teardown_scratch_page(vm->dev); 2001 } 2002 2003 static int i915_gmch_probe(struct drm_device *dev, 2004 size_t *gtt_total, 2005 size_t *stolen, 2006 phys_addr_t *mappable_base, 2007 unsigned long *mappable_end) 2008 { 2009 struct drm_i915_private *dev_priv = dev->dev_private; 2010 int ret; 2011 2012 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL); 2013 if (!ret) { 2014 DRM_ERROR("failed to set up gmch\n"); 2015 return -EIO; 2016 } 2017 2018 intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end); 2019 2020 dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev); 2021 dev_priv->gtt.base.clear_range = i915_ggtt_clear_range; 2022 2023 if (unlikely(dev_priv->gtt.do_idle_maps)) 2024 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 2025 2026 return 0; 2027 } 2028 2029 static void i915_gmch_remove(struct i915_address_space *vm) 2030 { 2031 if (drm_mm_initialized(&vm->mm)) { 2032 drm_mm_takedown(&vm->mm); 2033 list_del(&vm->global_link); 2034 } 2035 intel_gmch_remove(); 2036 } 2037 2038 int i915_gem_gtt_init(struct drm_device *dev) 2039 { 2040 struct drm_i915_private *dev_priv = dev->dev_private; 2041 struct i915_gtt *gtt = &dev_priv->gtt; 2042 int ret; 2043 2044 if (INTEL_INFO(dev)->gen <= 5) { 2045 gtt->gtt_probe = i915_gmch_probe; 2046 gtt->base.cleanup = i915_gmch_remove; 2047 } else if (INTEL_INFO(dev)->gen < 8) { 2048 gtt->gtt_probe = gen6_gmch_probe; 2049 gtt->base.cleanup = gen6_gmch_remove; 2050 if (IS_HASWELL(dev) && dev_priv->ellc_size) 2051 gtt->base.pte_encode = iris_pte_encode; 2052 else if (IS_HASWELL(dev)) 2053 gtt->base.pte_encode = hsw_pte_encode; 2054 else if (IS_VALLEYVIEW(dev)) 2055 gtt->base.pte_encode = byt_pte_encode; 2056 else if (INTEL_INFO(dev)->gen >= 7) 2057 gtt->base.pte_encode = ivb_pte_encode; 2058 else 2059 gtt->base.pte_encode = snb_pte_encode; 2060 } else { 2061 dev_priv->gtt.gtt_probe = gen8_gmch_probe; 2062 dev_priv->gtt.base.cleanup = gen6_gmch_remove; 2063 } 2064 2065 ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size, 2066 >t->mappable_base, >t->mappable_end); 2067 if (ret) 2068 return ret; 2069 2070 gtt->base.dev = dev; 2071 2072 /* GMADR is the PCI mmio aperture into the global GTT. */ 2073 DRM_INFO("Memory usable by graphics device = %zdM\n", 2074 gtt->base.total >> 20); 2075 DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20); 2076 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20); 2077 #ifdef CONFIG_INTEL_IOMMU 2078 if (intel_iommu_gfx_mapped) 2079 DRM_INFO("VT-d active for gfx access\n"); 2080 #endif 2081 /* 2082 * i915.enable_ppgtt is read-only, so do an early pass to validate the 2083 * user's requested state against the hardware/driver capabilities. We 2084 * do this now so that we can print out any log messages once rather 2085 * than every time we check intel_enable_ppgtt(). 2086 */ 2087 i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt); 2088 DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); 2089 2090 return 0; 2091 } 2092 2093 static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj, 2094 struct i915_address_space *vm) 2095 { 2096 struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); 2097 if (vma == NULL) 2098 return ERR_PTR(-ENOMEM); 2099 2100 INIT_LIST_HEAD(&vma->vma_link); 2101 INIT_LIST_HEAD(&vma->mm_list); 2102 INIT_LIST_HEAD(&vma->exec_list); 2103 vma->vm = vm; 2104 vma->obj = obj; 2105 2106 switch (INTEL_INFO(vm->dev)->gen) { 2107 case 8: 2108 case 7: 2109 case 6: 2110 if (i915_is_ggtt(vm)) { 2111 vma->unbind_vma = ggtt_unbind_vma; 2112 vma->bind_vma = ggtt_bind_vma; 2113 } else { 2114 vma->unbind_vma = ppgtt_unbind_vma; 2115 vma->bind_vma = ppgtt_bind_vma; 2116 } 2117 break; 2118 case 5: 2119 case 4: 2120 case 3: 2121 case 2: 2122 BUG_ON(!i915_is_ggtt(vm)); 2123 vma->unbind_vma = i915_ggtt_unbind_vma; 2124 vma->bind_vma = i915_ggtt_bind_vma; 2125 break; 2126 default: 2127 BUG(); 2128 } 2129 2130 /* Keep GGTT vmas first to make debug easier */ 2131 if (i915_is_ggtt(vm)) 2132 list_add(&vma->vma_link, &obj->vma_list); 2133 else 2134 list_add_tail(&vma->vma_link, &obj->vma_list); 2135 2136 return vma; 2137 } 2138 2139 struct i915_vma * 2140 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 2141 struct i915_address_space *vm) 2142 { 2143 struct i915_vma *vma; 2144 2145 vma = i915_gem_obj_to_vma(obj, vm); 2146 if (!vma) 2147 vma = __i915_gem_vma_create(obj, vm); 2148 2149 return vma; 2150 } 2151