1 /* 2 * Copyright © 2010 Daniel Vetter 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <drm/drmP.h> 26 #include <drm/i915_drm.h> 27 #include "i915_drv.h" 28 #include "i915_trace.h" 29 #include "intel_drv.h" 30 31 #define GEN6_PPGTT_PD_ENTRIES 512 32 #define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t)) 33 34 /* PPGTT stuff */ 35 #define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) 36 #define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0)) 37 38 #define GEN6_PDE_VALID (1 << 0) 39 /* gen6+ has bit 11-4 for physical addr bit 39-32 */ 40 #define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) 41 42 #define GEN6_PTE_VALID (1 << 0) 43 #define GEN6_PTE_UNCACHED (1 << 1) 44 #define HSW_PTE_UNCACHED (0) 45 #define GEN6_PTE_CACHE_LLC (2 << 1) 46 #define GEN7_PTE_CACHE_L3_LLC (3 << 1) 47 #define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) 48 #define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr) 49 50 /* Cacheability Control is a 4-bit value. The low three bits are stored in * 51 * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE. 52 */ 53 #define HSW_CACHEABILITY_CONTROL(bits) ((((bits) & 0x7) << 1) | \ 54 (((bits) & 0x8) << (11 - 3))) 55 #define HSW_WB_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x2) 56 #define HSW_WB_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x3) 57 #define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb) 58 #define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) 59 60 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, 61 enum i915_cache_level level) 62 { 63 gen6_gtt_pte_t pte = GEN6_PTE_VALID; 64 pte |= GEN6_PTE_ADDR_ENCODE(addr); 65 66 switch (level) { 67 case I915_CACHE_L3_LLC: 68 case I915_CACHE_LLC: 69 pte |= GEN6_PTE_CACHE_LLC; 70 break; 71 case I915_CACHE_NONE: 72 pte |= GEN6_PTE_UNCACHED; 73 break; 74 default: 75 WARN_ON(1); 76 } 77 78 return pte; 79 } 80 81 static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr, 82 enum i915_cache_level level) 83 { 84 gen6_gtt_pte_t pte = GEN6_PTE_VALID; 85 pte |= GEN6_PTE_ADDR_ENCODE(addr); 86 87 switch (level) { 88 case I915_CACHE_L3_LLC: 89 pte |= GEN7_PTE_CACHE_L3_LLC; 90 break; 91 case I915_CACHE_LLC: 92 pte |= GEN6_PTE_CACHE_LLC; 93 break; 94 case I915_CACHE_NONE: 95 pte |= GEN6_PTE_UNCACHED; 96 break; 97 default: 98 WARN_ON(1); 99 } 100 101 return pte; 102 } 103 104 #define BYT_PTE_WRITEABLE (1 << 1) 105 #define BYT_PTE_SNOOPED_BY_CPU_CACHES (1 << 2) 106 107 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, 108 enum i915_cache_level level) 109 { 110 gen6_gtt_pte_t pte = GEN6_PTE_VALID; 111 pte |= GEN6_PTE_ADDR_ENCODE(addr); 112 113 /* Mark the page as writeable. Other platforms don't have a 114 * setting for read-only/writable, so this matches that behavior. 115 */ 116 pte |= BYT_PTE_WRITEABLE; 117 118 if (level != I915_CACHE_NONE) 119 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 120 121 return pte; 122 } 123 124 static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr, 125 enum i915_cache_level level) 126 { 127 gen6_gtt_pte_t pte = GEN6_PTE_VALID; 128 pte |= HSW_PTE_ADDR_ENCODE(addr); 129 130 if (level != I915_CACHE_NONE) 131 pte |= HSW_WB_LLC_AGE3; 132 133 return pte; 134 } 135 136 static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, 137 enum i915_cache_level level) 138 { 139 gen6_gtt_pte_t pte = GEN6_PTE_VALID; 140 pte |= HSW_PTE_ADDR_ENCODE(addr); 141 142 switch (level) { 143 case I915_CACHE_NONE: 144 break; 145 case I915_CACHE_WT: 146 pte |= HSW_WT_ELLC_LLC_AGE0; 147 break; 148 default: 149 pte |= HSW_WB_ELLC_LLC_AGE0; 150 break; 151 } 152 153 return pte; 154 } 155 156 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) 157 { 158 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; 159 gen6_gtt_pte_t __iomem *pd_addr; 160 uint32_t pd_entry; 161 int i; 162 163 WARN_ON(ppgtt->pd_offset & 0x3f); 164 pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm + 165 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); 166 for (i = 0; i < ppgtt->num_pd_entries; i++) { 167 dma_addr_t pt_addr; 168 169 pt_addr = ppgtt->pt_dma_addr[i]; 170 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); 171 pd_entry |= GEN6_PDE_VALID; 172 173 writel(pd_entry, pd_addr + i); 174 } 175 readl(pd_addr); 176 } 177 178 static int gen6_ppgtt_enable(struct drm_device *dev) 179 { 180 drm_i915_private_t *dev_priv = dev->dev_private; 181 uint32_t pd_offset; 182 struct intel_ring_buffer *ring; 183 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 184 int i; 185 186 BUG_ON(ppgtt->pd_offset & 0x3f); 187 188 gen6_write_pdes(ppgtt); 189 190 pd_offset = ppgtt->pd_offset; 191 pd_offset /= 64; /* in cachelines, */ 192 pd_offset <<= 16; 193 194 if (INTEL_INFO(dev)->gen == 6) { 195 uint32_t ecochk, gab_ctl, ecobits; 196 197 ecobits = I915_READ(GAC_ECO_BITS); 198 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 199 ECOBITS_PPGTT_CACHE64B); 200 201 gab_ctl = I915_READ(GAB_CTL); 202 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 203 204 ecochk = I915_READ(GAM_ECOCHK); 205 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | 206 ECOCHK_PPGTT_CACHE64B); 207 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 208 } else if (INTEL_INFO(dev)->gen >= 7) { 209 uint32_t ecochk, ecobits; 210 211 ecobits = I915_READ(GAC_ECO_BITS); 212 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 213 214 ecochk = I915_READ(GAM_ECOCHK); 215 if (IS_HASWELL(dev)) { 216 ecochk |= ECOCHK_PPGTT_WB_HSW; 217 } else { 218 ecochk |= ECOCHK_PPGTT_LLC_IVB; 219 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 220 } 221 I915_WRITE(GAM_ECOCHK, ecochk); 222 /* GFX_MODE is per-ring on gen7+ */ 223 } 224 225 for_each_ring(ring, dev_priv, i) { 226 if (INTEL_INFO(dev)->gen >= 7) 227 I915_WRITE(RING_MODE_GEN7(ring), 228 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 229 230 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 231 I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); 232 } 233 return 0; 234 } 235 236 /* PPGTT support for Sandybdrige/Gen6 and later */ 237 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 238 unsigned first_entry, 239 unsigned num_entries) 240 { 241 struct i915_hw_ppgtt *ppgtt = 242 container_of(vm, struct i915_hw_ppgtt, base); 243 gen6_gtt_pte_t *pt_vaddr, scratch_pte; 244 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 245 unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; 246 unsigned last_pte, i; 247 248 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC); 249 250 while (num_entries) { 251 last_pte = first_pte + num_entries; 252 if (last_pte > I915_PPGTT_PT_ENTRIES) 253 last_pte = I915_PPGTT_PT_ENTRIES; 254 255 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 256 257 for (i = first_pte; i < last_pte; i++) 258 pt_vaddr[i] = scratch_pte; 259 260 kunmap_atomic(pt_vaddr); 261 262 num_entries -= last_pte - first_pte; 263 first_pte = 0; 264 act_pt++; 265 } 266 } 267 268 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 269 struct sg_table *pages, 270 unsigned first_entry, 271 enum i915_cache_level cache_level) 272 { 273 struct i915_hw_ppgtt *ppgtt = 274 container_of(vm, struct i915_hw_ppgtt, base); 275 gen6_gtt_pte_t *pt_vaddr; 276 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 277 unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES; 278 struct sg_page_iter sg_iter; 279 280 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 281 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { 282 dma_addr_t page_addr; 283 284 page_addr = sg_page_iter_dma_address(&sg_iter); 285 pt_vaddr[act_pte] = vm->pte_encode(page_addr, cache_level); 286 if (++act_pte == I915_PPGTT_PT_ENTRIES) { 287 kunmap_atomic(pt_vaddr); 288 act_pt++; 289 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 290 act_pte = 0; 291 292 } 293 } 294 kunmap_atomic(pt_vaddr); 295 } 296 297 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 298 { 299 struct i915_hw_ppgtt *ppgtt = 300 container_of(vm, struct i915_hw_ppgtt, base); 301 int i; 302 303 drm_mm_takedown(&ppgtt->base.mm); 304 305 if (ppgtt->pt_dma_addr) { 306 for (i = 0; i < ppgtt->num_pd_entries; i++) 307 pci_unmap_page(ppgtt->base.dev->pdev, 308 ppgtt->pt_dma_addr[i], 309 4096, PCI_DMA_BIDIRECTIONAL); 310 } 311 312 kfree(ppgtt->pt_dma_addr); 313 for (i = 0; i < ppgtt->num_pd_entries; i++) 314 __free_page(ppgtt->pt_pages[i]); 315 kfree(ppgtt->pt_pages); 316 kfree(ppgtt); 317 } 318 319 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 320 { 321 struct drm_device *dev = ppgtt->base.dev; 322 struct drm_i915_private *dev_priv = dev->dev_private; 323 unsigned first_pd_entry_in_global_pt; 324 int i; 325 int ret = -ENOMEM; 326 327 /* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024 328 * entries. For aliasing ppgtt support we just steal them at the end for 329 * now. */ 330 first_pd_entry_in_global_pt = gtt_total_entries(dev_priv->gtt); 331 332 ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; 333 ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES; 334 ppgtt->enable = gen6_ppgtt_enable; 335 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 336 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 337 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 338 ppgtt->base.scratch = dev_priv->gtt.base.scratch; 339 ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries, 340 GFP_KERNEL); 341 if (!ppgtt->pt_pages) 342 return -ENOMEM; 343 344 for (i = 0; i < ppgtt->num_pd_entries; i++) { 345 ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL); 346 if (!ppgtt->pt_pages[i]) 347 goto err_pt_alloc; 348 } 349 350 ppgtt->pt_dma_addr = kzalloc(sizeof(dma_addr_t) *ppgtt->num_pd_entries, 351 GFP_KERNEL); 352 if (!ppgtt->pt_dma_addr) 353 goto err_pt_alloc; 354 355 for (i = 0; i < ppgtt->num_pd_entries; i++) { 356 dma_addr_t pt_addr; 357 358 pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096, 359 PCI_DMA_BIDIRECTIONAL); 360 361 if (pci_dma_mapping_error(dev->pdev, pt_addr)) { 362 ret = -EIO; 363 goto err_pd_pin; 364 365 } 366 ppgtt->pt_dma_addr[i] = pt_addr; 367 } 368 369 ppgtt->base.clear_range(&ppgtt->base, 0, 370 ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES); 371 372 ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t); 373 374 return 0; 375 376 err_pd_pin: 377 if (ppgtt->pt_dma_addr) { 378 for (i--; i >= 0; i--) 379 pci_unmap_page(dev->pdev, ppgtt->pt_dma_addr[i], 380 4096, PCI_DMA_BIDIRECTIONAL); 381 } 382 err_pt_alloc: 383 kfree(ppgtt->pt_dma_addr); 384 for (i = 0; i < ppgtt->num_pd_entries; i++) { 385 if (ppgtt->pt_pages[i]) 386 __free_page(ppgtt->pt_pages[i]); 387 } 388 kfree(ppgtt->pt_pages); 389 390 return ret; 391 } 392 393 static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev) 394 { 395 struct drm_i915_private *dev_priv = dev->dev_private; 396 struct i915_hw_ppgtt *ppgtt; 397 int ret; 398 399 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 400 if (!ppgtt) 401 return -ENOMEM; 402 403 ppgtt->base.dev = dev; 404 405 if (INTEL_INFO(dev)->gen < 8) 406 ret = gen6_ppgtt_init(ppgtt); 407 else 408 BUG(); 409 410 if (ret) 411 kfree(ppgtt); 412 else { 413 dev_priv->mm.aliasing_ppgtt = ppgtt; 414 drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, 415 ppgtt->base.total); 416 } 417 418 return ret; 419 } 420 421 void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev) 422 { 423 struct drm_i915_private *dev_priv = dev->dev_private; 424 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 425 426 if (!ppgtt) 427 return; 428 429 ppgtt->base.cleanup(&ppgtt->base); 430 dev_priv->mm.aliasing_ppgtt = NULL; 431 } 432 433 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, 434 struct drm_i915_gem_object *obj, 435 enum i915_cache_level cache_level) 436 { 437 ppgtt->base.insert_entries(&ppgtt->base, obj->pages, 438 i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT, 439 cache_level); 440 } 441 442 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, 443 struct drm_i915_gem_object *obj) 444 { 445 ppgtt->base.clear_range(&ppgtt->base, 446 i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT, 447 obj->base.size >> PAGE_SHIFT); 448 } 449 450 extern int intel_iommu_gfx_mapped; 451 /* Certain Gen5 chipsets require require idling the GPU before 452 * unmapping anything from the GTT when VT-d is enabled. 453 */ 454 static inline bool needs_idle_maps(struct drm_device *dev) 455 { 456 #ifdef CONFIG_INTEL_IOMMU 457 /* Query intel_iommu to see if we need the workaround. Presumably that 458 * was loaded first. 459 */ 460 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) 461 return true; 462 #endif 463 return false; 464 } 465 466 static bool do_idling(struct drm_i915_private *dev_priv) 467 { 468 bool ret = dev_priv->mm.interruptible; 469 470 if (unlikely(dev_priv->gtt.do_idle_maps)) { 471 dev_priv->mm.interruptible = false; 472 if (i915_gpu_idle(dev_priv->dev)) { 473 DRM_ERROR("Couldn't idle GPU\n"); 474 /* Wait a bit, in hopes it avoids the hang */ 475 udelay(10); 476 } 477 } 478 479 return ret; 480 } 481 482 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) 483 { 484 if (unlikely(dev_priv->gtt.do_idle_maps)) 485 dev_priv->mm.interruptible = interruptible; 486 } 487 488 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 489 { 490 struct drm_i915_private *dev_priv = dev->dev_private; 491 struct drm_i915_gem_object *obj; 492 493 /* First fill our portion of the GTT with scratch pages */ 494 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 495 dev_priv->gtt.base.start / PAGE_SIZE, 496 dev_priv->gtt.base.total / PAGE_SIZE); 497 498 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 499 i915_gem_clflush_object(obj, obj->pin_display); 500 i915_gem_gtt_bind_object(obj, obj->cache_level); 501 } 502 503 i915_gem_chipset_flush(dev); 504 } 505 506 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) 507 { 508 if (obj->has_dma_mapping) 509 return 0; 510 511 if (!dma_map_sg(&obj->base.dev->pdev->dev, 512 obj->pages->sgl, obj->pages->nents, 513 PCI_DMA_BIDIRECTIONAL)) 514 return -ENOSPC; 515 516 return 0; 517 } 518 519 /* 520 * Binds an object into the global gtt with the specified cache level. The object 521 * will be accessible to the GPU via commands whose operands reference offsets 522 * within the global GTT as well as accessible by the GPU through the GMADR 523 * mapped BAR (dev_priv->mm.gtt->gtt). 524 */ 525 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 526 struct sg_table *st, 527 unsigned int first_entry, 528 enum i915_cache_level level) 529 { 530 struct drm_i915_private *dev_priv = vm->dev->dev_private; 531 gen6_gtt_pte_t __iomem *gtt_entries = 532 (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 533 int i = 0; 534 struct sg_page_iter sg_iter; 535 dma_addr_t addr; 536 537 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 538 addr = sg_page_iter_dma_address(&sg_iter); 539 iowrite32(vm->pte_encode(addr, level), >t_entries[i]); 540 i++; 541 } 542 543 /* XXX: This serves as a posting read to make sure that the PTE has 544 * actually been updated. There is some concern that even though 545 * registers and PTEs are within the same BAR that they are potentially 546 * of NUMA access patterns. Therefore, even with the way we assume 547 * hardware should work, we must keep this posting read for paranoia. 548 */ 549 if (i != 0) 550 WARN_ON(readl(>t_entries[i-1]) != 551 vm->pte_encode(addr, level)); 552 553 /* This next bit makes the above posting read even more important. We 554 * want to flush the TLBs only after we're certain all the PTE updates 555 * have finished. 556 */ 557 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 558 POSTING_READ(GFX_FLSH_CNTL_GEN6); 559 } 560 561 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 562 unsigned int first_entry, 563 unsigned int num_entries) 564 { 565 struct drm_i915_private *dev_priv = vm->dev->dev_private; 566 gen6_gtt_pte_t scratch_pte, __iomem *gtt_base = 567 (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 568 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 569 int i; 570 571 if (WARN(num_entries > max_entries, 572 "First entry = %d; Num entries = %d (max=%d)\n", 573 first_entry, num_entries, max_entries)) 574 num_entries = max_entries; 575 576 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC); 577 for (i = 0; i < num_entries; i++) 578 iowrite32(scratch_pte, >t_base[i]); 579 readl(gtt_base); 580 } 581 582 583 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 584 struct sg_table *st, 585 unsigned int pg_start, 586 enum i915_cache_level cache_level) 587 { 588 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 589 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 590 591 intel_gtt_insert_sg_entries(st, pg_start, flags); 592 593 } 594 595 static void i915_ggtt_clear_range(struct i915_address_space *vm, 596 unsigned int first_entry, 597 unsigned int num_entries) 598 { 599 intel_gtt_clear_range(first_entry, num_entries); 600 } 601 602 603 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, 604 enum i915_cache_level cache_level) 605 { 606 struct drm_device *dev = obj->base.dev; 607 struct drm_i915_private *dev_priv = dev->dev_private; 608 const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; 609 610 dev_priv->gtt.base.insert_entries(&dev_priv->gtt.base, obj->pages, 611 entry, 612 cache_level); 613 614 obj->has_global_gtt_mapping = 1; 615 } 616 617 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) 618 { 619 struct drm_device *dev = obj->base.dev; 620 struct drm_i915_private *dev_priv = dev->dev_private; 621 const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; 622 623 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 624 entry, 625 obj->base.size >> PAGE_SHIFT); 626 627 obj->has_global_gtt_mapping = 0; 628 } 629 630 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 631 { 632 struct drm_device *dev = obj->base.dev; 633 struct drm_i915_private *dev_priv = dev->dev_private; 634 bool interruptible; 635 636 interruptible = do_idling(dev_priv); 637 638 if (!obj->has_dma_mapping) 639 dma_unmap_sg(&dev->pdev->dev, 640 obj->pages->sgl, obj->pages->nents, 641 PCI_DMA_BIDIRECTIONAL); 642 643 undo_idling(dev_priv, interruptible); 644 } 645 646 static void i915_gtt_color_adjust(struct drm_mm_node *node, 647 unsigned long color, 648 unsigned long *start, 649 unsigned long *end) 650 { 651 if (node->color != color) 652 *start += 4096; 653 654 if (!list_empty(&node->node_list)) { 655 node = list_entry(node->node_list.next, 656 struct drm_mm_node, 657 node_list); 658 if (node->allocated && node->color != color) 659 *end -= 4096; 660 } 661 } 662 void i915_gem_setup_global_gtt(struct drm_device *dev, 663 unsigned long start, 664 unsigned long mappable_end, 665 unsigned long end) 666 { 667 /* Let GEM Manage all of the aperture. 668 * 669 * However, leave one page at the end still bound to the scratch page. 670 * There are a number of places where the hardware apparently prefetches 671 * past the end of the object, and we've seen multiple hangs with the 672 * GPU head pointer stuck in a batchbuffer bound at the last page of the 673 * aperture. One page should be enough to keep any prefetching inside 674 * of the aperture. 675 */ 676 struct drm_i915_private *dev_priv = dev->dev_private; 677 struct i915_address_space *ggtt_vm = &dev_priv->gtt.base; 678 struct drm_mm_node *entry; 679 struct drm_i915_gem_object *obj; 680 unsigned long hole_start, hole_end; 681 682 BUG_ON(mappable_end > end); 683 684 /* Subtract the guard page ... */ 685 drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE); 686 if (!HAS_LLC(dev)) 687 dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust; 688 689 /* Mark any preallocated objects as occupied */ 690 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 691 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); 692 int ret; 693 DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n", 694 i915_gem_obj_ggtt_offset(obj), obj->base.size); 695 696 WARN_ON(i915_gem_obj_ggtt_bound(obj)); 697 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node); 698 if (ret) 699 DRM_DEBUG_KMS("Reservation failed\n"); 700 obj->has_global_gtt_mapping = 1; 701 list_add(&vma->vma_link, &obj->vma_list); 702 } 703 704 dev_priv->gtt.base.start = start; 705 dev_priv->gtt.base.total = end - start; 706 707 /* Clear any non-preallocated blocks */ 708 drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) { 709 const unsigned long count = (hole_end - hole_start) / PAGE_SIZE; 710 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 711 hole_start, hole_end); 712 ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count); 713 } 714 715 /* And finally clear the reserved guard page */ 716 ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1); 717 } 718 719 static bool 720 intel_enable_ppgtt(struct drm_device *dev) 721 { 722 if (i915_enable_ppgtt >= 0) 723 return i915_enable_ppgtt; 724 725 #ifdef CONFIG_INTEL_IOMMU 726 /* Disable ppgtt on SNB if VT-d is on. */ 727 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) 728 return false; 729 #endif 730 731 return true; 732 } 733 734 void i915_gem_init_global_gtt(struct drm_device *dev) 735 { 736 struct drm_i915_private *dev_priv = dev->dev_private; 737 unsigned long gtt_size, mappable_size; 738 739 gtt_size = dev_priv->gtt.base.total; 740 mappable_size = dev_priv->gtt.mappable_end; 741 742 if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { 743 int ret; 744 745 if (INTEL_INFO(dev)->gen <= 7) { 746 /* PPGTT pdes are stolen from global gtt ptes, so shrink the 747 * aperture accordingly when using aliasing ppgtt. */ 748 gtt_size -= GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE; 749 } 750 751 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); 752 753 ret = i915_gem_init_aliasing_ppgtt(dev); 754 if (!ret) 755 return; 756 757 DRM_ERROR("Aliased PPGTT setup failed %d\n", ret); 758 drm_mm_takedown(&dev_priv->gtt.base.mm); 759 gtt_size += GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE; 760 } 761 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); 762 } 763 764 static int setup_scratch_page(struct drm_device *dev) 765 { 766 struct drm_i915_private *dev_priv = dev->dev_private; 767 struct page *page; 768 dma_addr_t dma_addr; 769 770 page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); 771 if (page == NULL) 772 return -ENOMEM; 773 get_page(page); 774 set_pages_uc(page, 1); 775 776 #ifdef CONFIG_INTEL_IOMMU 777 dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE, 778 PCI_DMA_BIDIRECTIONAL); 779 if (pci_dma_mapping_error(dev->pdev, dma_addr)) 780 return -EINVAL; 781 #else 782 dma_addr = page_to_phys(page); 783 #endif 784 dev_priv->gtt.base.scratch.page = page; 785 dev_priv->gtt.base.scratch.addr = dma_addr; 786 787 return 0; 788 } 789 790 static void teardown_scratch_page(struct drm_device *dev) 791 { 792 struct drm_i915_private *dev_priv = dev->dev_private; 793 struct page *page = dev_priv->gtt.base.scratch.page; 794 795 set_pages_wb(page, 1); 796 pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr, 797 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 798 put_page(page); 799 __free_page(page); 800 } 801 802 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 803 { 804 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 805 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 806 return snb_gmch_ctl << 20; 807 } 808 809 static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 810 { 811 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 812 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 813 return snb_gmch_ctl << 25; /* 32 MB units */ 814 } 815 816 static int gen6_gmch_probe(struct drm_device *dev, 817 size_t *gtt_total, 818 size_t *stolen, 819 phys_addr_t *mappable_base, 820 unsigned long *mappable_end) 821 { 822 struct drm_i915_private *dev_priv = dev->dev_private; 823 phys_addr_t gtt_bus_addr; 824 unsigned int gtt_size; 825 u16 snb_gmch_ctl; 826 int ret; 827 828 *mappable_base = pci_resource_start(dev->pdev, 2); 829 *mappable_end = pci_resource_len(dev->pdev, 2); 830 831 /* 64/512MB is the current min/max we actually know of, but this is just 832 * a coarse sanity check. 833 */ 834 if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) { 835 DRM_ERROR("Unknown GMADR size (%lx)\n", 836 dev_priv->gtt.mappable_end); 837 return -ENXIO; 838 } 839 840 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) 841 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); 842 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 843 gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl); 844 845 *stolen = gen6_get_stolen_size(snb_gmch_ctl); 846 *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT; 847 848 /* For Modern GENs the PTEs and register space are split in the BAR */ 849 gtt_bus_addr = pci_resource_start(dev->pdev, 0) + 850 (pci_resource_len(dev->pdev, 0) / 2); 851 852 dev_priv->gtt.gsm = ioremap_wc(gtt_bus_addr, gtt_size); 853 if (!dev_priv->gtt.gsm) { 854 DRM_ERROR("Failed to map the gtt page table\n"); 855 return -ENOMEM; 856 } 857 858 ret = setup_scratch_page(dev); 859 if (ret) 860 DRM_ERROR("Scratch setup failed\n"); 861 862 dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range; 863 dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries; 864 865 return ret; 866 } 867 868 static void gen6_gmch_remove(struct i915_address_space *vm) 869 { 870 871 struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base); 872 iounmap(gtt->gsm); 873 teardown_scratch_page(vm->dev); 874 } 875 876 static int i915_gmch_probe(struct drm_device *dev, 877 size_t *gtt_total, 878 size_t *stolen, 879 phys_addr_t *mappable_base, 880 unsigned long *mappable_end) 881 { 882 struct drm_i915_private *dev_priv = dev->dev_private; 883 int ret; 884 885 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL); 886 if (!ret) { 887 DRM_ERROR("failed to set up gmch\n"); 888 return -EIO; 889 } 890 891 intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end); 892 893 dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev); 894 dev_priv->gtt.base.clear_range = i915_ggtt_clear_range; 895 dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries; 896 897 return 0; 898 } 899 900 static void i915_gmch_remove(struct i915_address_space *vm) 901 { 902 intel_gmch_remove(); 903 } 904 905 int i915_gem_gtt_init(struct drm_device *dev) 906 { 907 struct drm_i915_private *dev_priv = dev->dev_private; 908 struct i915_gtt *gtt = &dev_priv->gtt; 909 int ret; 910 911 if (INTEL_INFO(dev)->gen <= 5) { 912 gtt->gtt_probe = i915_gmch_probe; 913 gtt->base.cleanup = i915_gmch_remove; 914 } else { 915 gtt->gtt_probe = gen6_gmch_probe; 916 gtt->base.cleanup = gen6_gmch_remove; 917 if (IS_HASWELL(dev) && dev_priv->ellc_size) 918 gtt->base.pte_encode = iris_pte_encode; 919 else if (IS_HASWELL(dev)) 920 gtt->base.pte_encode = hsw_pte_encode; 921 else if (IS_VALLEYVIEW(dev)) 922 gtt->base.pte_encode = byt_pte_encode; 923 else if (INTEL_INFO(dev)->gen >= 7) 924 gtt->base.pte_encode = ivb_pte_encode; 925 else 926 gtt->base.pte_encode = snb_pte_encode; 927 } 928 929 ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size, 930 >t->mappable_base, >t->mappable_end); 931 if (ret) 932 return ret; 933 934 gtt->base.dev = dev; 935 936 /* GMADR is the PCI mmio aperture into the global GTT. */ 937 DRM_INFO("Memory usable by graphics device = %zdM\n", 938 gtt->base.total >> 20); 939 DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20); 940 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20); 941 942 return 0; 943 } 944