1 /* 2 * Copyright © 2010 Daniel Vetter 3 * Copyright © 2011-2014 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26 #include <linux/seq_file.h> 27 #include <drm/drmP.h> 28 #include <drm/i915_drm.h> 29 #include "i915_drv.h" 30 #include "i915_trace.h" 31 #include "intel_drv.h" 32 33 static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv); 34 35 bool intel_enable_ppgtt(struct drm_device *dev, bool full) 36 { 37 if (i915.enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev)) 38 return false; 39 40 if (i915.enable_ppgtt == 1 && full) 41 return false; 42 43 #ifdef CONFIG_INTEL_IOMMU 44 /* Disable ppgtt on SNB if VT-d is on. */ 45 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) { 46 DRM_INFO("Disabling PPGTT because VT-d is on\n"); 47 return false; 48 } 49 #endif 50 51 /* Full ppgtt disabled by default for now due to issues. */ 52 if (full) 53 return false; /* HAS_PPGTT(dev) */ 54 else 55 return HAS_ALIASING_PPGTT(dev); 56 } 57 58 #define GEN6_PPGTT_PD_ENTRIES 512 59 #define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t)) 60 typedef uint64_t gen8_gtt_pte_t; 61 typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; 62 63 /* PPGTT stuff */ 64 #define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) 65 #define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0)) 66 67 #define GEN6_PDE_VALID (1 << 0) 68 /* gen6+ has bit 11-4 for physical addr bit 39-32 */ 69 #define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) 70 71 #define GEN6_PTE_VALID (1 << 0) 72 #define GEN6_PTE_UNCACHED (1 << 1) 73 #define HSW_PTE_UNCACHED (0) 74 #define GEN6_PTE_CACHE_LLC (2 << 1) 75 #define GEN7_PTE_CACHE_L3_LLC (3 << 1) 76 #define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) 77 #define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr) 78 79 /* Cacheability Control is a 4-bit value. The low three bits are stored in * 80 * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE. 81 */ 82 #define HSW_CACHEABILITY_CONTROL(bits) ((((bits) & 0x7) << 1) | \ 83 (((bits) & 0x8) << (11 - 3))) 84 #define HSW_WB_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x2) 85 #define HSW_WB_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x3) 86 #define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb) 87 #define HSW_WB_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x8) 88 #define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) 89 #define HSW_WT_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x7) 90 91 #define GEN8_PTES_PER_PAGE (PAGE_SIZE / sizeof(gen8_gtt_pte_t)) 92 #define GEN8_PDES_PER_PAGE (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t)) 93 94 /* GEN8 legacy style addressis defined as a 3 level page table: 95 * 31:30 | 29:21 | 20:12 | 11:0 96 * PDPE | PDE | PTE | offset 97 * The difference as compared to normal x86 3 level page table is the PDPEs are 98 * programmed via register. 99 */ 100 #define GEN8_PDPE_SHIFT 30 101 #define GEN8_PDPE_MASK 0x3 102 #define GEN8_PDE_SHIFT 21 103 #define GEN8_PDE_MASK 0x1ff 104 #define GEN8_PTE_SHIFT 12 105 #define GEN8_PTE_MASK 0x1ff 106 107 #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) 108 #define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ 109 #define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ 110 #define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */ 111 112 static void ppgtt_bind_vma(struct i915_vma *vma, 113 enum i915_cache_level cache_level, 114 u32 flags); 115 static void ppgtt_unbind_vma(struct i915_vma *vma); 116 static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt); 117 118 static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, 119 enum i915_cache_level level, 120 bool valid) 121 { 122 gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; 123 pte |= addr; 124 if (level != I915_CACHE_NONE) 125 pte |= PPAT_CACHED_INDEX; 126 else 127 pte |= PPAT_UNCACHED_INDEX; 128 return pte; 129 } 130 131 static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev, 132 dma_addr_t addr, 133 enum i915_cache_level level) 134 { 135 gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW; 136 pde |= addr; 137 if (level != I915_CACHE_NONE) 138 pde |= PPAT_CACHED_PDE_INDEX; 139 else 140 pde |= PPAT_UNCACHED_INDEX; 141 return pde; 142 } 143 144 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, 145 enum i915_cache_level level, 146 bool valid) 147 { 148 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 149 pte |= GEN6_PTE_ADDR_ENCODE(addr); 150 151 switch (level) { 152 case I915_CACHE_L3_LLC: 153 case I915_CACHE_LLC: 154 pte |= GEN6_PTE_CACHE_LLC; 155 break; 156 case I915_CACHE_NONE: 157 pte |= GEN6_PTE_UNCACHED; 158 break; 159 default: 160 WARN_ON(1); 161 } 162 163 return pte; 164 } 165 166 static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr, 167 enum i915_cache_level level, 168 bool valid) 169 { 170 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 171 pte |= GEN6_PTE_ADDR_ENCODE(addr); 172 173 switch (level) { 174 case I915_CACHE_L3_LLC: 175 pte |= GEN7_PTE_CACHE_L3_LLC; 176 break; 177 case I915_CACHE_LLC: 178 pte |= GEN6_PTE_CACHE_LLC; 179 break; 180 case I915_CACHE_NONE: 181 pte |= GEN6_PTE_UNCACHED; 182 break; 183 default: 184 WARN_ON(1); 185 } 186 187 return pte; 188 } 189 190 #define BYT_PTE_WRITEABLE (1 << 1) 191 #define BYT_PTE_SNOOPED_BY_CPU_CACHES (1 << 2) 192 193 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, 194 enum i915_cache_level level, 195 bool valid) 196 { 197 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 198 pte |= GEN6_PTE_ADDR_ENCODE(addr); 199 200 /* Mark the page as writeable. Other platforms don't have a 201 * setting for read-only/writable, so this matches that behavior. 202 */ 203 pte |= BYT_PTE_WRITEABLE; 204 205 if (level != I915_CACHE_NONE) 206 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 207 208 return pte; 209 } 210 211 static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr, 212 enum i915_cache_level level, 213 bool valid) 214 { 215 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 216 pte |= HSW_PTE_ADDR_ENCODE(addr); 217 218 if (level != I915_CACHE_NONE) 219 pte |= HSW_WB_LLC_AGE3; 220 221 return pte; 222 } 223 224 static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, 225 enum i915_cache_level level, 226 bool valid) 227 { 228 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 229 pte |= HSW_PTE_ADDR_ENCODE(addr); 230 231 switch (level) { 232 case I915_CACHE_NONE: 233 break; 234 case I915_CACHE_WT: 235 pte |= HSW_WT_ELLC_LLC_AGE3; 236 break; 237 default: 238 pte |= HSW_WB_ELLC_LLC_AGE3; 239 break; 240 } 241 242 return pte; 243 } 244 245 /* Broadwell Page Directory Pointer Descriptors */ 246 static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry, 247 uint64_t val, bool synchronous) 248 { 249 struct drm_i915_private *dev_priv = ring->dev->dev_private; 250 int ret; 251 252 BUG_ON(entry >= 4); 253 254 if (synchronous) { 255 I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32); 256 I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val); 257 return 0; 258 } 259 260 ret = intel_ring_begin(ring, 6); 261 if (ret) 262 return ret; 263 264 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 265 intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry)); 266 intel_ring_emit(ring, (u32)(val >> 32)); 267 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); 268 intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry)); 269 intel_ring_emit(ring, (u32)(val)); 270 intel_ring_advance(ring); 271 272 return 0; 273 } 274 275 static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, 276 struct intel_ring_buffer *ring, 277 bool synchronous) 278 { 279 int i, ret; 280 281 /* bit of a hack to find the actual last used pd */ 282 int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE; 283 284 for (i = used_pd - 1; i >= 0; i--) { 285 dma_addr_t addr = ppgtt->pd_dma_addr[i]; 286 ret = gen8_write_pdp(ring, i, addr, synchronous); 287 if (ret) 288 return ret; 289 } 290 291 return 0; 292 } 293 294 static void gen8_ppgtt_clear_range(struct i915_address_space *vm, 295 uint64_t start, 296 uint64_t length, 297 bool use_scratch) 298 { 299 struct i915_hw_ppgtt *ppgtt = 300 container_of(vm, struct i915_hw_ppgtt, base); 301 gen8_gtt_pte_t *pt_vaddr, scratch_pte; 302 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; 303 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; 304 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; 305 unsigned num_entries = length >> PAGE_SHIFT; 306 unsigned last_pte, i; 307 308 scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr, 309 I915_CACHE_LLC, use_scratch); 310 311 while (num_entries) { 312 struct page *page_table = ppgtt->gen8_pt_pages[pdpe][pde]; 313 314 last_pte = pte + num_entries; 315 if (last_pte > GEN8_PTES_PER_PAGE) 316 last_pte = GEN8_PTES_PER_PAGE; 317 318 pt_vaddr = kmap_atomic(page_table); 319 320 for (i = pte; i < last_pte; i++) { 321 pt_vaddr[i] = scratch_pte; 322 num_entries--; 323 } 324 325 kunmap_atomic(pt_vaddr); 326 327 pte = 0; 328 if (++pde == GEN8_PDES_PER_PAGE) { 329 pdpe++; 330 pde = 0; 331 } 332 } 333 } 334 335 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, 336 struct sg_table *pages, 337 uint64_t start, 338 enum i915_cache_level cache_level) 339 { 340 struct i915_hw_ppgtt *ppgtt = 341 container_of(vm, struct i915_hw_ppgtt, base); 342 gen8_gtt_pte_t *pt_vaddr; 343 unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; 344 unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; 345 unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; 346 struct sg_page_iter sg_iter; 347 348 pt_vaddr = NULL; 349 350 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { 351 if (WARN_ON(pdpe >= GEN8_LEGACY_PDPS)) 352 break; 353 354 if (pt_vaddr == NULL) 355 pt_vaddr = kmap_atomic(ppgtt->gen8_pt_pages[pdpe][pde]); 356 357 pt_vaddr[pte] = 358 gen8_pte_encode(sg_page_iter_dma_address(&sg_iter), 359 cache_level, true); 360 if (++pte == GEN8_PTES_PER_PAGE) { 361 kunmap_atomic(pt_vaddr); 362 pt_vaddr = NULL; 363 if (++pde == GEN8_PDES_PER_PAGE) { 364 pdpe++; 365 pde = 0; 366 } 367 pte = 0; 368 } 369 } 370 if (pt_vaddr) 371 kunmap_atomic(pt_vaddr); 372 } 373 374 static void gen8_free_page_tables(struct page **pt_pages) 375 { 376 int i; 377 378 if (pt_pages == NULL) 379 return; 380 381 for (i = 0; i < GEN8_PDES_PER_PAGE; i++) 382 if (pt_pages[i]) 383 __free_pages(pt_pages[i], 0); 384 } 385 386 static void gen8_ppgtt_free(const struct i915_hw_ppgtt *ppgtt) 387 { 388 int i; 389 390 for (i = 0; i < ppgtt->num_pd_pages; i++) { 391 gen8_free_page_tables(ppgtt->gen8_pt_pages[i]); 392 kfree(ppgtt->gen8_pt_pages[i]); 393 kfree(ppgtt->gen8_pt_dma_addr[i]); 394 } 395 396 __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT)); 397 } 398 399 static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) 400 { 401 struct pci_dev *hwdev = ppgtt->base.dev->pdev; 402 int i, j; 403 404 for (i = 0; i < ppgtt->num_pd_pages; i++) { 405 /* TODO: In the future we'll support sparse mappings, so this 406 * will have to change. */ 407 if (!ppgtt->pd_dma_addr[i]) 408 continue; 409 410 pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE, 411 PCI_DMA_BIDIRECTIONAL); 412 413 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 414 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; 415 if (addr) 416 pci_unmap_page(hwdev, addr, PAGE_SIZE, 417 PCI_DMA_BIDIRECTIONAL); 418 } 419 } 420 } 421 422 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 423 { 424 struct i915_hw_ppgtt *ppgtt = 425 container_of(vm, struct i915_hw_ppgtt, base); 426 427 list_del(&vm->global_link); 428 drm_mm_takedown(&vm->mm); 429 430 gen8_ppgtt_unmap_pages(ppgtt); 431 gen8_ppgtt_free(ppgtt); 432 } 433 434 static struct page **__gen8_alloc_page_tables(void) 435 { 436 struct page **pt_pages; 437 int i; 438 439 pt_pages = kcalloc(GEN8_PDES_PER_PAGE, sizeof(struct page *), GFP_KERNEL); 440 if (!pt_pages) 441 return ERR_PTR(-ENOMEM); 442 443 for (i = 0; i < GEN8_PDES_PER_PAGE; i++) { 444 pt_pages[i] = alloc_page(GFP_KERNEL); 445 if (!pt_pages[i]) 446 goto bail; 447 } 448 449 return pt_pages; 450 451 bail: 452 gen8_free_page_tables(pt_pages); 453 kfree(pt_pages); 454 return ERR_PTR(-ENOMEM); 455 } 456 457 static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt, 458 const int max_pdp) 459 { 460 struct page **pt_pages[GEN8_LEGACY_PDPS]; 461 int i, ret; 462 463 for (i = 0; i < max_pdp; i++) { 464 pt_pages[i] = __gen8_alloc_page_tables(); 465 if (IS_ERR(pt_pages[i])) { 466 ret = PTR_ERR(pt_pages[i]); 467 goto unwind_out; 468 } 469 } 470 471 /* NB: Avoid touching gen8_pt_pages until last to keep the allocation, 472 * "atomic" - for cleanup purposes. 473 */ 474 for (i = 0; i < max_pdp; i++) 475 ppgtt->gen8_pt_pages[i] = pt_pages[i]; 476 477 return 0; 478 479 unwind_out: 480 while (i--) { 481 gen8_free_page_tables(pt_pages[i]); 482 kfree(pt_pages[i]); 483 } 484 485 return ret; 486 } 487 488 static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt) 489 { 490 int i; 491 492 for (i = 0; i < ppgtt->num_pd_pages; i++) { 493 ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE, 494 sizeof(dma_addr_t), 495 GFP_KERNEL); 496 if (!ppgtt->gen8_pt_dma_addr[i]) 497 return -ENOMEM; 498 } 499 500 return 0; 501 } 502 503 static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt, 504 const int max_pdp) 505 { 506 ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT)); 507 if (!ppgtt->pd_pages) 508 return -ENOMEM; 509 510 ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT); 511 BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS); 512 513 return 0; 514 } 515 516 static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt, 517 const int max_pdp) 518 { 519 int ret; 520 521 ret = gen8_ppgtt_allocate_page_directories(ppgtt, max_pdp); 522 if (ret) 523 return ret; 524 525 ret = gen8_ppgtt_allocate_page_tables(ppgtt, max_pdp); 526 if (ret) { 527 __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT)); 528 return ret; 529 } 530 531 ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE; 532 533 ret = gen8_ppgtt_allocate_dma(ppgtt); 534 if (ret) 535 gen8_ppgtt_free(ppgtt); 536 537 return ret; 538 } 539 540 static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt, 541 const int pd) 542 { 543 dma_addr_t pd_addr; 544 int ret; 545 546 pd_addr = pci_map_page(ppgtt->base.dev->pdev, 547 &ppgtt->pd_pages[pd], 0, 548 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 549 550 ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr); 551 if (ret) 552 return ret; 553 554 ppgtt->pd_dma_addr[pd] = pd_addr; 555 556 return 0; 557 } 558 559 static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt, 560 const int pd, 561 const int pt) 562 { 563 dma_addr_t pt_addr; 564 struct page *p; 565 int ret; 566 567 p = ppgtt->gen8_pt_pages[pd][pt]; 568 pt_addr = pci_map_page(ppgtt->base.dev->pdev, 569 p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 570 ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr); 571 if (ret) 572 return ret; 573 574 ppgtt->gen8_pt_dma_addr[pd][pt] = pt_addr; 575 576 return 0; 577 } 578 579 /** 580 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 581 * with a net effect resembling a 2-level page table in normal x86 terms. Each 582 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 583 * space. 584 * 585 * FIXME: split allocation into smaller pieces. For now we only ever do this 586 * once, but with full PPGTT, the multiple contiguous allocations will be bad. 587 * TODO: Do something with the size parameter 588 */ 589 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) 590 { 591 const int max_pdp = DIV_ROUND_UP(size, 1 << 30); 592 const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; 593 int i, j, ret; 594 595 if (size % (1<<30)) 596 DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size); 597 598 /* 1. Do all our allocations for page directories and page tables. */ 599 ret = gen8_ppgtt_alloc(ppgtt, max_pdp); 600 if (ret) 601 return ret; 602 603 /* 604 * 2. Create DMA mappings for the page directories and page tables. 605 */ 606 for (i = 0; i < max_pdp; i++) { 607 ret = gen8_ppgtt_setup_page_directories(ppgtt, i); 608 if (ret) 609 goto bail; 610 611 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 612 ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j); 613 if (ret) 614 goto bail; 615 } 616 } 617 618 /* 619 * 3. Map all the page directory entires to point to the page tables 620 * we've allocated. 621 * 622 * For now, the PPGTT helper functions all require that the PDEs are 623 * plugged in correctly. So we do that now/here. For aliasing PPGTT, we 624 * will never need to touch the PDEs again. 625 */ 626 for (i = 0; i < max_pdp; i++) { 627 gen8_ppgtt_pde_t *pd_vaddr; 628 pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]); 629 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { 630 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; 631 pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr, 632 I915_CACHE_LLC); 633 } 634 kunmap_atomic(pd_vaddr); 635 } 636 637 ppgtt->enable = gen8_ppgtt_enable; 638 ppgtt->switch_mm = gen8_mm_switch; 639 ppgtt->base.clear_range = gen8_ppgtt_clear_range; 640 ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; 641 ppgtt->base.cleanup = gen8_ppgtt_cleanup; 642 ppgtt->base.start = 0; 643 ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE; 644 645 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); 646 647 DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n", 648 ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp); 649 DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n", 650 ppgtt->num_pd_entries, 651 (ppgtt->num_pd_entries - min_pt_pages) + size % (1<<30)); 652 return 0; 653 654 bail: 655 gen8_ppgtt_unmap_pages(ppgtt); 656 gen8_ppgtt_free(ppgtt); 657 return ret; 658 } 659 660 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) 661 { 662 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; 663 struct i915_address_space *vm = &ppgtt->base; 664 gen6_gtt_pte_t __iomem *pd_addr; 665 gen6_gtt_pte_t scratch_pte; 666 uint32_t pd_entry; 667 int pte, pde; 668 669 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); 670 671 pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + 672 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); 673 674 seq_printf(m, " VM %p (pd_offset %x-%x):\n", vm, 675 ppgtt->pd_offset, ppgtt->pd_offset + ppgtt->num_pd_entries); 676 for (pde = 0; pde < ppgtt->num_pd_entries; pde++) { 677 u32 expected; 678 gen6_gtt_pte_t *pt_vaddr; 679 dma_addr_t pt_addr = ppgtt->pt_dma_addr[pde]; 680 pd_entry = readl(pd_addr + pde); 681 expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); 682 683 if (pd_entry != expected) 684 seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", 685 pde, 686 pd_entry, 687 expected); 688 seq_printf(m, "\tPDE: %x\n", pd_entry); 689 690 pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]); 691 for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) { 692 unsigned long va = 693 (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) + 694 (pte * PAGE_SIZE); 695 int i; 696 bool found = false; 697 for (i = 0; i < 4; i++) 698 if (pt_vaddr[pte + i] != scratch_pte) 699 found = true; 700 if (!found) 701 continue; 702 703 seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); 704 for (i = 0; i < 4; i++) { 705 if (pt_vaddr[pte + i] != scratch_pte) 706 seq_printf(m, " %08x", pt_vaddr[pte + i]); 707 else 708 seq_puts(m, " SCRATCH "); 709 } 710 seq_puts(m, "\n"); 711 } 712 kunmap_atomic(pt_vaddr); 713 } 714 } 715 716 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) 717 { 718 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; 719 gen6_gtt_pte_t __iomem *pd_addr; 720 uint32_t pd_entry; 721 int i; 722 723 WARN_ON(ppgtt->pd_offset & 0x3f); 724 pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm + 725 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); 726 for (i = 0; i < ppgtt->num_pd_entries; i++) { 727 dma_addr_t pt_addr; 728 729 pt_addr = ppgtt->pt_dma_addr[i]; 730 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); 731 pd_entry |= GEN6_PDE_VALID; 732 733 writel(pd_entry, pd_addr + i); 734 } 735 readl(pd_addr); 736 } 737 738 static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) 739 { 740 BUG_ON(ppgtt->pd_offset & 0x3f); 741 742 return (ppgtt->pd_offset / 64) << 16; 743 } 744 745 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, 746 struct intel_ring_buffer *ring, 747 bool synchronous) 748 { 749 struct drm_device *dev = ppgtt->base.dev; 750 struct drm_i915_private *dev_priv = dev->dev_private; 751 int ret; 752 753 /* If we're in reset, we can assume the GPU is sufficiently idle to 754 * manually frob these bits. Ideally we could use the ring functions, 755 * except our error handling makes it quite difficult (can't use 756 * intel_ring_begin, ring->flush, or intel_ring_advance) 757 * 758 * FIXME: We should try not to special case reset 759 */ 760 if (synchronous || 761 i915_reset_in_progress(&dev_priv->gpu_error)) { 762 WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt); 763 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 764 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 765 POSTING_READ(RING_PP_DIR_BASE(ring)); 766 return 0; 767 } 768 769 /* NB: TLBs must be flushed and invalidated before a switch */ 770 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 771 if (ret) 772 return ret; 773 774 ret = intel_ring_begin(ring, 6); 775 if (ret) 776 return ret; 777 778 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 779 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring)); 780 intel_ring_emit(ring, PP_DIR_DCLV_2G); 781 intel_ring_emit(ring, RING_PP_DIR_BASE(ring)); 782 intel_ring_emit(ring, get_pd_offset(ppgtt)); 783 intel_ring_emit(ring, MI_NOOP); 784 intel_ring_advance(ring); 785 786 return 0; 787 } 788 789 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, 790 struct intel_ring_buffer *ring, 791 bool synchronous) 792 { 793 struct drm_device *dev = ppgtt->base.dev; 794 struct drm_i915_private *dev_priv = dev->dev_private; 795 int ret; 796 797 /* If we're in reset, we can assume the GPU is sufficiently idle to 798 * manually frob these bits. Ideally we could use the ring functions, 799 * except our error handling makes it quite difficult (can't use 800 * intel_ring_begin, ring->flush, or intel_ring_advance) 801 * 802 * FIXME: We should try not to special case reset 803 */ 804 if (synchronous || 805 i915_reset_in_progress(&dev_priv->gpu_error)) { 806 WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt); 807 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 808 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 809 POSTING_READ(RING_PP_DIR_BASE(ring)); 810 return 0; 811 } 812 813 /* NB: TLBs must be flushed and invalidated before a switch */ 814 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 815 if (ret) 816 return ret; 817 818 ret = intel_ring_begin(ring, 6); 819 if (ret) 820 return ret; 821 822 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); 823 intel_ring_emit(ring, RING_PP_DIR_DCLV(ring)); 824 intel_ring_emit(ring, PP_DIR_DCLV_2G); 825 intel_ring_emit(ring, RING_PP_DIR_BASE(ring)); 826 intel_ring_emit(ring, get_pd_offset(ppgtt)); 827 intel_ring_emit(ring, MI_NOOP); 828 intel_ring_advance(ring); 829 830 /* XXX: RCS is the only one to auto invalidate the TLBs? */ 831 if (ring->id != RCS) { 832 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); 833 if (ret) 834 return ret; 835 } 836 837 return 0; 838 } 839 840 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, 841 struct intel_ring_buffer *ring, 842 bool synchronous) 843 { 844 struct drm_device *dev = ppgtt->base.dev; 845 struct drm_i915_private *dev_priv = dev->dev_private; 846 847 if (!synchronous) 848 return 0; 849 850 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 851 I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); 852 853 POSTING_READ(RING_PP_DIR_DCLV(ring)); 854 855 return 0; 856 } 857 858 static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) 859 { 860 struct drm_device *dev = ppgtt->base.dev; 861 struct drm_i915_private *dev_priv = dev->dev_private; 862 struct intel_ring_buffer *ring; 863 int j, ret; 864 865 for_each_ring(ring, dev_priv, j) { 866 I915_WRITE(RING_MODE_GEN7(ring), 867 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 868 869 /* We promise to do a switch later with FULL PPGTT. If this is 870 * aliasing, this is the one and only switch we'll do */ 871 if (USES_FULL_PPGTT(dev)) 872 continue; 873 874 ret = ppgtt->switch_mm(ppgtt, ring, true); 875 if (ret) 876 goto err_out; 877 } 878 879 return 0; 880 881 err_out: 882 for_each_ring(ring, dev_priv, j) 883 I915_WRITE(RING_MODE_GEN7(ring), 884 _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE)); 885 return ret; 886 } 887 888 static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) 889 { 890 struct drm_device *dev = ppgtt->base.dev; 891 struct drm_i915_private *dev_priv = dev->dev_private; 892 struct intel_ring_buffer *ring; 893 uint32_t ecochk, ecobits; 894 int i; 895 896 ecobits = I915_READ(GAC_ECO_BITS); 897 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 898 899 ecochk = I915_READ(GAM_ECOCHK); 900 if (IS_HASWELL(dev)) { 901 ecochk |= ECOCHK_PPGTT_WB_HSW; 902 } else { 903 ecochk |= ECOCHK_PPGTT_LLC_IVB; 904 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 905 } 906 I915_WRITE(GAM_ECOCHK, ecochk); 907 908 for_each_ring(ring, dev_priv, i) { 909 int ret; 910 /* GFX_MODE is per-ring on gen7+ */ 911 I915_WRITE(RING_MODE_GEN7(ring), 912 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 913 914 /* We promise to do a switch later with FULL PPGTT. If this is 915 * aliasing, this is the one and only switch we'll do */ 916 if (USES_FULL_PPGTT(dev)) 917 continue; 918 919 ret = ppgtt->switch_mm(ppgtt, ring, true); 920 if (ret) 921 return ret; 922 } 923 924 return 0; 925 } 926 927 static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) 928 { 929 struct drm_device *dev = ppgtt->base.dev; 930 struct drm_i915_private *dev_priv = dev->dev_private; 931 struct intel_ring_buffer *ring; 932 uint32_t ecochk, gab_ctl, ecobits; 933 int i; 934 935 ecobits = I915_READ(GAC_ECO_BITS); 936 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 937 ECOBITS_PPGTT_CACHE64B); 938 939 gab_ctl = I915_READ(GAB_CTL); 940 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 941 942 ecochk = I915_READ(GAM_ECOCHK); 943 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 944 945 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 946 947 for_each_ring(ring, dev_priv, i) { 948 int ret = ppgtt->switch_mm(ppgtt, ring, true); 949 if (ret) 950 return ret; 951 } 952 953 return 0; 954 } 955 956 /* PPGTT support for Sandybdrige/Gen6 and later */ 957 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 958 uint64_t start, 959 uint64_t length, 960 bool use_scratch) 961 { 962 struct i915_hw_ppgtt *ppgtt = 963 container_of(vm, struct i915_hw_ppgtt, base); 964 gen6_gtt_pte_t *pt_vaddr, scratch_pte; 965 unsigned first_entry = start >> PAGE_SHIFT; 966 unsigned num_entries = length >> PAGE_SHIFT; 967 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 968 unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; 969 unsigned last_pte, i; 970 971 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); 972 973 while (num_entries) { 974 last_pte = first_pte + num_entries; 975 if (last_pte > I915_PPGTT_PT_ENTRIES) 976 last_pte = I915_PPGTT_PT_ENTRIES; 977 978 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 979 980 for (i = first_pte; i < last_pte; i++) 981 pt_vaddr[i] = scratch_pte; 982 983 kunmap_atomic(pt_vaddr); 984 985 num_entries -= last_pte - first_pte; 986 first_pte = 0; 987 act_pt++; 988 } 989 } 990 991 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 992 struct sg_table *pages, 993 uint64_t start, 994 enum i915_cache_level cache_level) 995 { 996 struct i915_hw_ppgtt *ppgtt = 997 container_of(vm, struct i915_hw_ppgtt, base); 998 gen6_gtt_pte_t *pt_vaddr; 999 unsigned first_entry = start >> PAGE_SHIFT; 1000 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 1001 unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES; 1002 struct sg_page_iter sg_iter; 1003 1004 pt_vaddr = NULL; 1005 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { 1006 if (pt_vaddr == NULL) 1007 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 1008 1009 pt_vaddr[act_pte] = 1010 vm->pte_encode(sg_page_iter_dma_address(&sg_iter), 1011 cache_level, true); 1012 if (++act_pte == I915_PPGTT_PT_ENTRIES) { 1013 kunmap_atomic(pt_vaddr); 1014 pt_vaddr = NULL; 1015 act_pt++; 1016 act_pte = 0; 1017 } 1018 } 1019 if (pt_vaddr) 1020 kunmap_atomic(pt_vaddr); 1021 } 1022 1023 static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) 1024 { 1025 int i; 1026 1027 if (ppgtt->pt_dma_addr) { 1028 for (i = 0; i < ppgtt->num_pd_entries; i++) 1029 pci_unmap_page(ppgtt->base.dev->pdev, 1030 ppgtt->pt_dma_addr[i], 1031 4096, PCI_DMA_BIDIRECTIONAL); 1032 } 1033 } 1034 1035 static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt) 1036 { 1037 int i; 1038 1039 kfree(ppgtt->pt_dma_addr); 1040 for (i = 0; i < ppgtt->num_pd_entries; i++) 1041 __free_page(ppgtt->pt_pages[i]); 1042 kfree(ppgtt->pt_pages); 1043 } 1044 1045 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 1046 { 1047 struct i915_hw_ppgtt *ppgtt = 1048 container_of(vm, struct i915_hw_ppgtt, base); 1049 1050 list_del(&vm->global_link); 1051 drm_mm_takedown(&ppgtt->base.mm); 1052 drm_mm_remove_node(&ppgtt->node); 1053 1054 gen6_ppgtt_unmap_pages(ppgtt); 1055 gen6_ppgtt_free(ppgtt); 1056 } 1057 1058 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) 1059 { 1060 #define GEN6_PD_ALIGN (PAGE_SIZE * 16) 1061 #define GEN6_PD_SIZE (GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE) 1062 struct drm_device *dev = ppgtt->base.dev; 1063 struct drm_i915_private *dev_priv = dev->dev_private; 1064 bool retried = false; 1065 int ret; 1066 1067 /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The 1068 * allocator works in address space sizes, so it's multiplied by page 1069 * size. We allocate at the top of the GTT to avoid fragmentation. 1070 */ 1071 BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm)); 1072 alloc: 1073 ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm, 1074 &ppgtt->node, GEN6_PD_SIZE, 1075 GEN6_PD_ALIGN, 0, 1076 0, dev_priv->gtt.base.total, 1077 DRM_MM_SEARCH_DEFAULT, 1078 DRM_MM_CREATE_DEFAULT); 1079 if (ret == -ENOSPC && !retried) { 1080 ret = i915_gem_evict_something(dev, &dev_priv->gtt.base, 1081 GEN6_PD_SIZE, GEN6_PD_ALIGN, 1082 I915_CACHE_NONE, 0); 1083 if (ret) 1084 return ret; 1085 1086 retried = true; 1087 goto alloc; 1088 } 1089 1090 if (ppgtt->node.start < dev_priv->gtt.mappable_end) 1091 DRM_DEBUG("Forced to use aperture for PDEs\n"); 1092 1093 ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES; 1094 return ret; 1095 } 1096 1097 static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt) 1098 { 1099 int i; 1100 1101 ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *), 1102 GFP_KERNEL); 1103 1104 if (!ppgtt->pt_pages) 1105 return -ENOMEM; 1106 1107 for (i = 0; i < ppgtt->num_pd_entries; i++) { 1108 ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL); 1109 if (!ppgtt->pt_pages[i]) { 1110 gen6_ppgtt_free(ppgtt); 1111 return -ENOMEM; 1112 } 1113 } 1114 1115 return 0; 1116 } 1117 1118 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) 1119 { 1120 int ret; 1121 1122 ret = gen6_ppgtt_allocate_page_directories(ppgtt); 1123 if (ret) 1124 return ret; 1125 1126 ret = gen6_ppgtt_allocate_page_tables(ppgtt); 1127 if (ret) { 1128 drm_mm_remove_node(&ppgtt->node); 1129 return ret; 1130 } 1131 1132 ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t), 1133 GFP_KERNEL); 1134 if (!ppgtt->pt_dma_addr) { 1135 drm_mm_remove_node(&ppgtt->node); 1136 gen6_ppgtt_free(ppgtt); 1137 return -ENOMEM; 1138 } 1139 1140 return 0; 1141 } 1142 1143 static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt) 1144 { 1145 struct drm_device *dev = ppgtt->base.dev; 1146 int i; 1147 1148 for (i = 0; i < ppgtt->num_pd_entries; i++) { 1149 dma_addr_t pt_addr; 1150 1151 pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096, 1152 PCI_DMA_BIDIRECTIONAL); 1153 1154 if (pci_dma_mapping_error(dev->pdev, pt_addr)) { 1155 gen6_ppgtt_unmap_pages(ppgtt); 1156 return -EIO; 1157 } 1158 1159 ppgtt->pt_dma_addr[i] = pt_addr; 1160 } 1161 1162 return 0; 1163 } 1164 1165 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 1166 { 1167 struct drm_device *dev = ppgtt->base.dev; 1168 struct drm_i915_private *dev_priv = dev->dev_private; 1169 int ret; 1170 1171 ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; 1172 if (IS_GEN6(dev)) { 1173 ppgtt->enable = gen6_ppgtt_enable; 1174 ppgtt->switch_mm = gen6_mm_switch; 1175 } else if (IS_HASWELL(dev)) { 1176 ppgtt->enable = gen7_ppgtt_enable; 1177 ppgtt->switch_mm = hsw_mm_switch; 1178 } else if (IS_GEN7(dev)) { 1179 ppgtt->enable = gen7_ppgtt_enable; 1180 ppgtt->switch_mm = gen7_mm_switch; 1181 } else 1182 BUG(); 1183 1184 ret = gen6_ppgtt_alloc(ppgtt); 1185 if (ret) 1186 return ret; 1187 1188 ret = gen6_ppgtt_setup_page_tables(ppgtt); 1189 if (ret) { 1190 gen6_ppgtt_free(ppgtt); 1191 return ret; 1192 } 1193 1194 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 1195 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 1196 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 1197 ppgtt->base.start = 0; 1198 ppgtt->base.total = ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES * PAGE_SIZE; 1199 ppgtt->debug_dump = gen6_dump_ppgtt; 1200 1201 ppgtt->pd_offset = 1202 ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t); 1203 1204 ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); 1205 1206 DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n", 1207 ppgtt->node.size >> 20, 1208 ppgtt->node.start / PAGE_SIZE); 1209 1210 return 0; 1211 } 1212 1213 int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) 1214 { 1215 struct drm_i915_private *dev_priv = dev->dev_private; 1216 int ret = 0; 1217 1218 ppgtt->base.dev = dev; 1219 ppgtt->base.scratch = dev_priv->gtt.base.scratch; 1220 1221 if (INTEL_INFO(dev)->gen < 8) 1222 ret = gen6_ppgtt_init(ppgtt); 1223 else if (IS_GEN8(dev)) 1224 ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total); 1225 else 1226 BUG(); 1227 1228 if (!ret) { 1229 struct drm_i915_private *dev_priv = dev->dev_private; 1230 kref_init(&ppgtt->ref); 1231 drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, 1232 ppgtt->base.total); 1233 i915_init_vm(dev_priv, &ppgtt->base); 1234 if (INTEL_INFO(dev)->gen < 8) { 1235 gen6_write_pdes(ppgtt); 1236 DRM_DEBUG("Adding PPGTT at offset %x\n", 1237 ppgtt->pd_offset << 10); 1238 } 1239 } 1240 1241 return ret; 1242 } 1243 1244 static void 1245 ppgtt_bind_vma(struct i915_vma *vma, 1246 enum i915_cache_level cache_level, 1247 u32 flags) 1248 { 1249 vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start, 1250 cache_level); 1251 } 1252 1253 static void ppgtt_unbind_vma(struct i915_vma *vma) 1254 { 1255 vma->vm->clear_range(vma->vm, 1256 vma->node.start, 1257 vma->obj->base.size, 1258 true); 1259 } 1260 1261 extern int intel_iommu_gfx_mapped; 1262 /* Certain Gen5 chipsets require require idling the GPU before 1263 * unmapping anything from the GTT when VT-d is enabled. 1264 */ 1265 static inline bool needs_idle_maps(struct drm_device *dev) 1266 { 1267 #ifdef CONFIG_INTEL_IOMMU 1268 /* Query intel_iommu to see if we need the workaround. Presumably that 1269 * was loaded first. 1270 */ 1271 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) 1272 return true; 1273 #endif 1274 return false; 1275 } 1276 1277 static bool do_idling(struct drm_i915_private *dev_priv) 1278 { 1279 bool ret = dev_priv->mm.interruptible; 1280 1281 if (unlikely(dev_priv->gtt.do_idle_maps)) { 1282 dev_priv->mm.interruptible = false; 1283 if (i915_gpu_idle(dev_priv->dev)) { 1284 DRM_ERROR("Couldn't idle GPU\n"); 1285 /* Wait a bit, in hopes it avoids the hang */ 1286 udelay(10); 1287 } 1288 } 1289 1290 return ret; 1291 } 1292 1293 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) 1294 { 1295 if (unlikely(dev_priv->gtt.do_idle_maps)) 1296 dev_priv->mm.interruptible = interruptible; 1297 } 1298 1299 void i915_check_and_clear_faults(struct drm_device *dev) 1300 { 1301 struct drm_i915_private *dev_priv = dev->dev_private; 1302 struct intel_ring_buffer *ring; 1303 int i; 1304 1305 if (INTEL_INFO(dev)->gen < 6) 1306 return; 1307 1308 for_each_ring(ring, dev_priv, i) { 1309 u32 fault_reg; 1310 fault_reg = I915_READ(RING_FAULT_REG(ring)); 1311 if (fault_reg & RING_FAULT_VALID) { 1312 DRM_DEBUG_DRIVER("Unexpected fault\n" 1313 "\tAddr: 0x%08lx\\n" 1314 "\tAddress space: %s\n" 1315 "\tSource ID: %d\n" 1316 "\tType: %d\n", 1317 fault_reg & PAGE_MASK, 1318 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 1319 RING_FAULT_SRCID(fault_reg), 1320 RING_FAULT_FAULT_TYPE(fault_reg)); 1321 I915_WRITE(RING_FAULT_REG(ring), 1322 fault_reg & ~RING_FAULT_VALID); 1323 } 1324 } 1325 POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS])); 1326 } 1327 1328 void i915_gem_suspend_gtt_mappings(struct drm_device *dev) 1329 { 1330 struct drm_i915_private *dev_priv = dev->dev_private; 1331 1332 /* Don't bother messing with faults pre GEN6 as we have little 1333 * documentation supporting that it's a good idea. 1334 */ 1335 if (INTEL_INFO(dev)->gen < 6) 1336 return; 1337 1338 i915_check_and_clear_faults(dev); 1339 1340 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 1341 dev_priv->gtt.base.start, 1342 dev_priv->gtt.base.total, 1343 true); 1344 } 1345 1346 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 1347 { 1348 struct drm_i915_private *dev_priv = dev->dev_private; 1349 struct drm_i915_gem_object *obj; 1350 struct i915_address_space *vm; 1351 1352 i915_check_and_clear_faults(dev); 1353 1354 /* First fill our portion of the GTT with scratch pages */ 1355 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 1356 dev_priv->gtt.base.start, 1357 dev_priv->gtt.base.total, 1358 true); 1359 1360 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 1361 struct i915_vma *vma = i915_gem_obj_to_vma(obj, 1362 &dev_priv->gtt.base); 1363 if (!vma) 1364 continue; 1365 1366 i915_gem_clflush_object(obj, obj->pin_display); 1367 /* The bind_vma code tries to be smart about tracking mappings. 1368 * Unfortunately above, we've just wiped out the mappings 1369 * without telling our object about it. So we need to fake it. 1370 */ 1371 obj->has_global_gtt_mapping = 0; 1372 vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND); 1373 } 1374 1375 1376 if (INTEL_INFO(dev)->gen >= 8) { 1377 gen8_setup_private_ppat(dev_priv); 1378 return; 1379 } 1380 1381 list_for_each_entry(vm, &dev_priv->vm_list, global_link) { 1382 /* TODO: Perhaps it shouldn't be gen6 specific */ 1383 if (i915_is_ggtt(vm)) { 1384 if (dev_priv->mm.aliasing_ppgtt) 1385 gen6_write_pdes(dev_priv->mm.aliasing_ppgtt); 1386 continue; 1387 } 1388 1389 gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base)); 1390 } 1391 1392 i915_gem_chipset_flush(dev); 1393 } 1394 1395 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) 1396 { 1397 if (obj->has_dma_mapping) 1398 return 0; 1399 1400 if (!dma_map_sg(&obj->base.dev->pdev->dev, 1401 obj->pages->sgl, obj->pages->nents, 1402 PCI_DMA_BIDIRECTIONAL)) 1403 return -ENOSPC; 1404 1405 return 0; 1406 } 1407 1408 static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte) 1409 { 1410 #ifdef writeq 1411 writeq(pte, addr); 1412 #else 1413 iowrite32((u32)pte, addr); 1414 iowrite32(pte >> 32, addr + 4); 1415 #endif 1416 } 1417 1418 static void gen8_ggtt_insert_entries(struct i915_address_space *vm, 1419 struct sg_table *st, 1420 uint64_t start, 1421 enum i915_cache_level level) 1422 { 1423 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1424 unsigned first_entry = start >> PAGE_SHIFT; 1425 gen8_gtt_pte_t __iomem *gtt_entries = 1426 (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1427 int i = 0; 1428 struct sg_page_iter sg_iter; 1429 dma_addr_t addr; 1430 1431 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 1432 addr = sg_dma_address(sg_iter.sg) + 1433 (sg_iter.sg_pgoffset << PAGE_SHIFT); 1434 gen8_set_pte(>t_entries[i], 1435 gen8_pte_encode(addr, level, true)); 1436 i++; 1437 } 1438 1439 /* 1440 * XXX: This serves as a posting read to make sure that the PTE has 1441 * actually been updated. There is some concern that even though 1442 * registers and PTEs are within the same BAR that they are potentially 1443 * of NUMA access patterns. Therefore, even with the way we assume 1444 * hardware should work, we must keep this posting read for paranoia. 1445 */ 1446 if (i != 0) 1447 WARN_ON(readq(>t_entries[i-1]) 1448 != gen8_pte_encode(addr, level, true)); 1449 1450 /* This next bit makes the above posting read even more important. We 1451 * want to flush the TLBs only after we're certain all the PTE updates 1452 * have finished. 1453 */ 1454 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1455 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1456 } 1457 1458 /* 1459 * Binds an object into the global gtt with the specified cache level. The object 1460 * will be accessible to the GPU via commands whose operands reference offsets 1461 * within the global GTT as well as accessible by the GPU through the GMADR 1462 * mapped BAR (dev_priv->mm.gtt->gtt). 1463 */ 1464 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 1465 struct sg_table *st, 1466 uint64_t start, 1467 enum i915_cache_level level) 1468 { 1469 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1470 unsigned first_entry = start >> PAGE_SHIFT; 1471 gen6_gtt_pte_t __iomem *gtt_entries = 1472 (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 1473 int i = 0; 1474 struct sg_page_iter sg_iter; 1475 dma_addr_t addr; 1476 1477 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 1478 addr = sg_page_iter_dma_address(&sg_iter); 1479 iowrite32(vm->pte_encode(addr, level, true), >t_entries[i]); 1480 i++; 1481 } 1482 1483 /* XXX: This serves as a posting read to make sure that the PTE has 1484 * actually been updated. There is some concern that even though 1485 * registers and PTEs are within the same BAR that they are potentially 1486 * of NUMA access patterns. Therefore, even with the way we assume 1487 * hardware should work, we must keep this posting read for paranoia. 1488 */ 1489 if (i != 0) 1490 WARN_ON(readl(>t_entries[i-1]) != 1491 vm->pte_encode(addr, level, true)); 1492 1493 /* This next bit makes the above posting read even more important. We 1494 * want to flush the TLBs only after we're certain all the PTE updates 1495 * have finished. 1496 */ 1497 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 1498 POSTING_READ(GFX_FLSH_CNTL_GEN6); 1499 } 1500 1501 static void gen8_ggtt_clear_range(struct i915_address_space *vm, 1502 uint64_t start, 1503 uint64_t length, 1504 bool use_scratch) 1505 { 1506 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1507 unsigned first_entry = start >> PAGE_SHIFT; 1508 unsigned num_entries = length >> PAGE_SHIFT; 1509 gen8_gtt_pte_t scratch_pte, __iomem *gtt_base = 1510 (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 1511 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 1512 int i; 1513 1514 if (WARN(num_entries > max_entries, 1515 "First entry = %d; Num entries = %d (max=%d)\n", 1516 first_entry, num_entries, max_entries)) 1517 num_entries = max_entries; 1518 1519 scratch_pte = gen8_pte_encode(vm->scratch.addr, 1520 I915_CACHE_LLC, 1521 use_scratch); 1522 for (i = 0; i < num_entries; i++) 1523 gen8_set_pte(>t_base[i], scratch_pte); 1524 readl(gtt_base); 1525 } 1526 1527 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 1528 uint64_t start, 1529 uint64_t length, 1530 bool use_scratch) 1531 { 1532 struct drm_i915_private *dev_priv = vm->dev->dev_private; 1533 unsigned first_entry = start >> PAGE_SHIFT; 1534 unsigned num_entries = length >> PAGE_SHIFT; 1535 gen6_gtt_pte_t scratch_pte, __iomem *gtt_base = 1536 (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 1537 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 1538 int i; 1539 1540 if (WARN(num_entries > max_entries, 1541 "First entry = %d; Num entries = %d (max=%d)\n", 1542 first_entry, num_entries, max_entries)) 1543 num_entries = max_entries; 1544 1545 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch); 1546 1547 for (i = 0; i < num_entries; i++) 1548 iowrite32(scratch_pte, >t_base[i]); 1549 readl(gtt_base); 1550 } 1551 1552 1553 static void i915_ggtt_bind_vma(struct i915_vma *vma, 1554 enum i915_cache_level cache_level, 1555 u32 unused) 1556 { 1557 const unsigned long entry = vma->node.start >> PAGE_SHIFT; 1558 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 1559 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 1560 1561 BUG_ON(!i915_is_ggtt(vma->vm)); 1562 intel_gtt_insert_sg_entries(vma->obj->pages, entry, flags); 1563 vma->obj->has_global_gtt_mapping = 1; 1564 } 1565 1566 static void i915_ggtt_clear_range(struct i915_address_space *vm, 1567 uint64_t start, 1568 uint64_t length, 1569 bool unused) 1570 { 1571 unsigned first_entry = start >> PAGE_SHIFT; 1572 unsigned num_entries = length >> PAGE_SHIFT; 1573 intel_gtt_clear_range(first_entry, num_entries); 1574 } 1575 1576 static void i915_ggtt_unbind_vma(struct i915_vma *vma) 1577 { 1578 const unsigned int first = vma->node.start >> PAGE_SHIFT; 1579 const unsigned int size = vma->obj->base.size >> PAGE_SHIFT; 1580 1581 BUG_ON(!i915_is_ggtt(vma->vm)); 1582 vma->obj->has_global_gtt_mapping = 0; 1583 intel_gtt_clear_range(first, size); 1584 } 1585 1586 static void ggtt_bind_vma(struct i915_vma *vma, 1587 enum i915_cache_level cache_level, 1588 u32 flags) 1589 { 1590 struct drm_device *dev = vma->vm->dev; 1591 struct drm_i915_private *dev_priv = dev->dev_private; 1592 struct drm_i915_gem_object *obj = vma->obj; 1593 1594 /* If there is no aliasing PPGTT, or the caller needs a global mapping, 1595 * or we have a global mapping already but the cacheability flags have 1596 * changed, set the global PTEs. 1597 * 1598 * If there is an aliasing PPGTT it is anecdotally faster, so use that 1599 * instead if none of the above hold true. 1600 * 1601 * NB: A global mapping should only be needed for special regions like 1602 * "gtt mappable", SNB errata, or if specified via special execbuf 1603 * flags. At all other times, the GPU will use the aliasing PPGTT. 1604 */ 1605 if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) { 1606 if (!obj->has_global_gtt_mapping || 1607 (cache_level != obj->cache_level)) { 1608 vma->vm->insert_entries(vma->vm, obj->pages, 1609 vma->node.start, 1610 cache_level); 1611 obj->has_global_gtt_mapping = 1; 1612 } 1613 } 1614 1615 if (dev_priv->mm.aliasing_ppgtt && 1616 (!obj->has_aliasing_ppgtt_mapping || 1617 (cache_level != obj->cache_level))) { 1618 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 1619 appgtt->base.insert_entries(&appgtt->base, 1620 vma->obj->pages, 1621 vma->node.start, 1622 cache_level); 1623 vma->obj->has_aliasing_ppgtt_mapping = 1; 1624 } 1625 } 1626 1627 static void ggtt_unbind_vma(struct i915_vma *vma) 1628 { 1629 struct drm_device *dev = vma->vm->dev; 1630 struct drm_i915_private *dev_priv = dev->dev_private; 1631 struct drm_i915_gem_object *obj = vma->obj; 1632 1633 if (obj->has_global_gtt_mapping) { 1634 vma->vm->clear_range(vma->vm, 1635 vma->node.start, 1636 obj->base.size, 1637 true); 1638 obj->has_global_gtt_mapping = 0; 1639 } 1640 1641 if (obj->has_aliasing_ppgtt_mapping) { 1642 struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; 1643 appgtt->base.clear_range(&appgtt->base, 1644 vma->node.start, 1645 obj->base.size, 1646 true); 1647 obj->has_aliasing_ppgtt_mapping = 0; 1648 } 1649 } 1650 1651 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 1652 { 1653 struct drm_device *dev = obj->base.dev; 1654 struct drm_i915_private *dev_priv = dev->dev_private; 1655 bool interruptible; 1656 1657 interruptible = do_idling(dev_priv); 1658 1659 if (!obj->has_dma_mapping) 1660 dma_unmap_sg(&dev->pdev->dev, 1661 obj->pages->sgl, obj->pages->nents, 1662 PCI_DMA_BIDIRECTIONAL); 1663 1664 undo_idling(dev_priv, interruptible); 1665 } 1666 1667 static void i915_gtt_color_adjust(struct drm_mm_node *node, 1668 unsigned long color, 1669 unsigned long *start, 1670 unsigned long *end) 1671 { 1672 if (node->color != color) 1673 *start += 4096; 1674 1675 if (!list_empty(&node->node_list)) { 1676 node = list_entry(node->node_list.next, 1677 struct drm_mm_node, 1678 node_list); 1679 if (node->allocated && node->color != color) 1680 *end -= 4096; 1681 } 1682 } 1683 1684 void i915_gem_setup_global_gtt(struct drm_device *dev, 1685 unsigned long start, 1686 unsigned long mappable_end, 1687 unsigned long end) 1688 { 1689 /* Let GEM Manage all of the aperture. 1690 * 1691 * However, leave one page at the end still bound to the scratch page. 1692 * There are a number of places where the hardware apparently prefetches 1693 * past the end of the object, and we've seen multiple hangs with the 1694 * GPU head pointer stuck in a batchbuffer bound at the last page of the 1695 * aperture. One page should be enough to keep any prefetching inside 1696 * of the aperture. 1697 */ 1698 struct drm_i915_private *dev_priv = dev->dev_private; 1699 struct i915_address_space *ggtt_vm = &dev_priv->gtt.base; 1700 struct drm_mm_node *entry; 1701 struct drm_i915_gem_object *obj; 1702 unsigned long hole_start, hole_end; 1703 1704 BUG_ON(mappable_end > end); 1705 1706 /* Subtract the guard page ... */ 1707 drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE); 1708 if (!HAS_LLC(dev)) 1709 dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust; 1710 1711 /* Mark any preallocated objects as occupied */ 1712 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 1713 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); 1714 int ret; 1715 DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n", 1716 i915_gem_obj_ggtt_offset(obj), obj->base.size); 1717 1718 WARN_ON(i915_gem_obj_ggtt_bound(obj)); 1719 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node); 1720 if (ret) 1721 DRM_DEBUG_KMS("Reservation failed\n"); 1722 obj->has_global_gtt_mapping = 1; 1723 } 1724 1725 dev_priv->gtt.base.start = start; 1726 dev_priv->gtt.base.total = end - start; 1727 1728 /* Clear any non-preallocated blocks */ 1729 drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) { 1730 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 1731 hole_start, hole_end); 1732 ggtt_vm->clear_range(ggtt_vm, hole_start, 1733 hole_end - hole_start, true); 1734 } 1735 1736 /* And finally clear the reserved guard page */ 1737 ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true); 1738 } 1739 1740 void i915_gem_init_global_gtt(struct drm_device *dev) 1741 { 1742 struct drm_i915_private *dev_priv = dev->dev_private; 1743 unsigned long gtt_size, mappable_size; 1744 1745 gtt_size = dev_priv->gtt.base.total; 1746 mappable_size = dev_priv->gtt.mappable_end; 1747 1748 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); 1749 } 1750 1751 static int setup_scratch_page(struct drm_device *dev) 1752 { 1753 struct drm_i915_private *dev_priv = dev->dev_private; 1754 struct page *page; 1755 dma_addr_t dma_addr; 1756 1757 page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); 1758 if (page == NULL) 1759 return -ENOMEM; 1760 get_page(page); 1761 set_pages_uc(page, 1); 1762 1763 #ifdef CONFIG_INTEL_IOMMU 1764 dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE, 1765 PCI_DMA_BIDIRECTIONAL); 1766 if (pci_dma_mapping_error(dev->pdev, dma_addr)) 1767 return -EINVAL; 1768 #else 1769 dma_addr = page_to_phys(page); 1770 #endif 1771 dev_priv->gtt.base.scratch.page = page; 1772 dev_priv->gtt.base.scratch.addr = dma_addr; 1773 1774 return 0; 1775 } 1776 1777 static void teardown_scratch_page(struct drm_device *dev) 1778 { 1779 struct drm_i915_private *dev_priv = dev->dev_private; 1780 struct page *page = dev_priv->gtt.base.scratch.page; 1781 1782 set_pages_wb(page, 1); 1783 pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr, 1784 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 1785 put_page(page); 1786 __free_page(page); 1787 } 1788 1789 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 1790 { 1791 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 1792 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 1793 return snb_gmch_ctl << 20; 1794 } 1795 1796 static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) 1797 { 1798 bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; 1799 bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; 1800 if (bdw_gmch_ctl) 1801 bdw_gmch_ctl = 1 << bdw_gmch_ctl; 1802 return bdw_gmch_ctl << 20; 1803 } 1804 1805 static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 1806 { 1807 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 1808 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 1809 return snb_gmch_ctl << 25; /* 32 MB units */ 1810 } 1811 1812 static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) 1813 { 1814 bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; 1815 bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; 1816 return bdw_gmch_ctl << 25; /* 32 MB units */ 1817 } 1818 1819 static int ggtt_probe_common(struct drm_device *dev, 1820 size_t gtt_size) 1821 { 1822 struct drm_i915_private *dev_priv = dev->dev_private; 1823 phys_addr_t gtt_phys_addr; 1824 int ret; 1825 1826 /* For Modern GENs the PTEs and register space are split in the BAR */ 1827 gtt_phys_addr = pci_resource_start(dev->pdev, 0) + 1828 (pci_resource_len(dev->pdev, 0) / 2); 1829 1830 dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size); 1831 if (!dev_priv->gtt.gsm) { 1832 DRM_ERROR("Failed to map the gtt page table\n"); 1833 return -ENOMEM; 1834 } 1835 1836 ret = setup_scratch_page(dev); 1837 if (ret) { 1838 DRM_ERROR("Scratch setup failed\n"); 1839 /* iounmap will also get called at remove, but meh */ 1840 iounmap(dev_priv->gtt.gsm); 1841 } 1842 1843 return ret; 1844 } 1845 1846 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 1847 * bits. When using advanced contexts each context stores its own PAT, but 1848 * writing this data shouldn't be harmful even in those cases. */ 1849 static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv) 1850 { 1851 #define GEN8_PPAT_UC (0<<0) 1852 #define GEN8_PPAT_WC (1<<0) 1853 #define GEN8_PPAT_WT (2<<0) 1854 #define GEN8_PPAT_WB (3<<0) 1855 #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) 1856 /* FIXME(BDW): Bspec is completely confused about cache control bits. */ 1857 #define GEN8_PPAT_LLC (1<<2) 1858 #define GEN8_PPAT_LLCELLC (2<<2) 1859 #define GEN8_PPAT_LLCeLLC (3<<2) 1860 #define GEN8_PPAT_AGE(x) (x<<4) 1861 #define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8)) 1862 uint64_t pat; 1863 1864 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 1865 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 1866 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 1867 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 1868 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 1869 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 1870 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 1871 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 1872 1873 /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b 1874 * write would work. */ 1875 I915_WRITE(GEN8_PRIVATE_PAT, pat); 1876 I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32); 1877 } 1878 1879 static int gen8_gmch_probe(struct drm_device *dev, 1880 size_t *gtt_total, 1881 size_t *stolen, 1882 phys_addr_t *mappable_base, 1883 unsigned long *mappable_end) 1884 { 1885 struct drm_i915_private *dev_priv = dev->dev_private; 1886 unsigned int gtt_size; 1887 u16 snb_gmch_ctl; 1888 int ret; 1889 1890 /* TODO: We're not aware of mappable constraints on gen8 yet */ 1891 *mappable_base = pci_resource_start(dev->pdev, 2); 1892 *mappable_end = pci_resource_len(dev->pdev, 2); 1893 1894 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39))) 1895 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39)); 1896 1897 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1898 1899 *stolen = gen8_get_stolen_size(snb_gmch_ctl); 1900 1901 gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl); 1902 *gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT; 1903 1904 gen8_setup_private_ppat(dev_priv); 1905 1906 ret = ggtt_probe_common(dev, gtt_size); 1907 1908 dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range; 1909 dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries; 1910 1911 return ret; 1912 } 1913 1914 static int gen6_gmch_probe(struct drm_device *dev, 1915 size_t *gtt_total, 1916 size_t *stolen, 1917 phys_addr_t *mappable_base, 1918 unsigned long *mappable_end) 1919 { 1920 struct drm_i915_private *dev_priv = dev->dev_private; 1921 unsigned int gtt_size; 1922 u16 snb_gmch_ctl; 1923 int ret; 1924 1925 *mappable_base = pci_resource_start(dev->pdev, 2); 1926 *mappable_end = pci_resource_len(dev->pdev, 2); 1927 1928 /* 64/512MB is the current min/max we actually know of, but this is just 1929 * a coarse sanity check. 1930 */ 1931 if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) { 1932 DRM_ERROR("Unknown GMADR size (%lx)\n", 1933 dev_priv->gtt.mappable_end); 1934 return -ENXIO; 1935 } 1936 1937 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) 1938 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); 1939 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 1940 1941 *stolen = gen6_get_stolen_size(snb_gmch_ctl); 1942 1943 gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl); 1944 *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT; 1945 1946 ret = ggtt_probe_common(dev, gtt_size); 1947 1948 dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range; 1949 dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries; 1950 1951 return ret; 1952 } 1953 1954 static void gen6_gmch_remove(struct i915_address_space *vm) 1955 { 1956 1957 struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base); 1958 1959 drm_mm_takedown(&vm->mm); 1960 iounmap(gtt->gsm); 1961 teardown_scratch_page(vm->dev); 1962 } 1963 1964 static int i915_gmch_probe(struct drm_device *dev, 1965 size_t *gtt_total, 1966 size_t *stolen, 1967 phys_addr_t *mappable_base, 1968 unsigned long *mappable_end) 1969 { 1970 struct drm_i915_private *dev_priv = dev->dev_private; 1971 int ret; 1972 1973 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL); 1974 if (!ret) { 1975 DRM_ERROR("failed to set up gmch\n"); 1976 return -EIO; 1977 } 1978 1979 intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end); 1980 1981 dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev); 1982 dev_priv->gtt.base.clear_range = i915_ggtt_clear_range; 1983 1984 if (unlikely(dev_priv->gtt.do_idle_maps)) 1985 DRM_INFO("applying Ironlake quirks for intel_iommu\n"); 1986 1987 return 0; 1988 } 1989 1990 static void i915_gmch_remove(struct i915_address_space *vm) 1991 { 1992 intel_gmch_remove(); 1993 } 1994 1995 int i915_gem_gtt_init(struct drm_device *dev) 1996 { 1997 struct drm_i915_private *dev_priv = dev->dev_private; 1998 struct i915_gtt *gtt = &dev_priv->gtt; 1999 int ret; 2000 2001 if (INTEL_INFO(dev)->gen <= 5) { 2002 gtt->gtt_probe = i915_gmch_probe; 2003 gtt->base.cleanup = i915_gmch_remove; 2004 } else if (INTEL_INFO(dev)->gen < 8) { 2005 gtt->gtt_probe = gen6_gmch_probe; 2006 gtt->base.cleanup = gen6_gmch_remove; 2007 if (IS_HASWELL(dev) && dev_priv->ellc_size) 2008 gtt->base.pte_encode = iris_pte_encode; 2009 else if (IS_HASWELL(dev)) 2010 gtt->base.pte_encode = hsw_pte_encode; 2011 else if (IS_VALLEYVIEW(dev)) 2012 gtt->base.pte_encode = byt_pte_encode; 2013 else if (INTEL_INFO(dev)->gen >= 7) 2014 gtt->base.pte_encode = ivb_pte_encode; 2015 else 2016 gtt->base.pte_encode = snb_pte_encode; 2017 } else { 2018 dev_priv->gtt.gtt_probe = gen8_gmch_probe; 2019 dev_priv->gtt.base.cleanup = gen6_gmch_remove; 2020 } 2021 2022 ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size, 2023 >t->mappable_base, >t->mappable_end); 2024 if (ret) 2025 return ret; 2026 2027 gtt->base.dev = dev; 2028 2029 /* GMADR is the PCI mmio aperture into the global GTT. */ 2030 DRM_INFO("Memory usable by graphics device = %zdM\n", 2031 gtt->base.total >> 20); 2032 DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20); 2033 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20); 2034 2035 return 0; 2036 } 2037 2038 static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj, 2039 struct i915_address_space *vm) 2040 { 2041 struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); 2042 if (vma == NULL) 2043 return ERR_PTR(-ENOMEM); 2044 2045 INIT_LIST_HEAD(&vma->vma_link); 2046 INIT_LIST_HEAD(&vma->mm_list); 2047 INIT_LIST_HEAD(&vma->exec_list); 2048 vma->vm = vm; 2049 vma->obj = obj; 2050 2051 switch (INTEL_INFO(vm->dev)->gen) { 2052 case 8: 2053 case 7: 2054 case 6: 2055 if (i915_is_ggtt(vm)) { 2056 vma->unbind_vma = ggtt_unbind_vma; 2057 vma->bind_vma = ggtt_bind_vma; 2058 } else { 2059 vma->unbind_vma = ppgtt_unbind_vma; 2060 vma->bind_vma = ppgtt_bind_vma; 2061 } 2062 break; 2063 case 5: 2064 case 4: 2065 case 3: 2066 case 2: 2067 BUG_ON(!i915_is_ggtt(vm)); 2068 vma->unbind_vma = i915_ggtt_unbind_vma; 2069 vma->bind_vma = i915_ggtt_bind_vma; 2070 break; 2071 default: 2072 BUG(); 2073 } 2074 2075 /* Keep GGTT vmas first to make debug easier */ 2076 if (i915_is_ggtt(vm)) 2077 list_add(&vma->vma_link, &obj->vma_list); 2078 else 2079 list_add_tail(&vma->vma_link, &obj->vma_list); 2080 2081 return vma; 2082 } 2083 2084 struct i915_vma * 2085 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, 2086 struct i915_address_space *vm) 2087 { 2088 struct i915_vma *vma; 2089 2090 vma = i915_gem_obj_to_vma(obj, vm); 2091 if (!vma) 2092 vma = __i915_gem_vma_create(obj, vm); 2093 2094 return vma; 2095 } 2096