1 /* 2 * Copyright © 2010 Daniel Vetter 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <drm/drmP.h> 26 #include <drm/i915_drm.h> 27 #include "i915_drv.h" 28 #include "i915_trace.h" 29 #include "intel_drv.h" 30 31 #define GEN6_PPGTT_PD_ENTRIES 512 32 #define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t)) 33 34 /* PPGTT stuff */ 35 #define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) 36 #define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0)) 37 38 #define GEN6_PDE_VALID (1 << 0) 39 /* gen6+ has bit 11-4 for physical addr bit 39-32 */ 40 #define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) 41 42 #define GEN6_PTE_VALID (1 << 0) 43 #define GEN6_PTE_UNCACHED (1 << 1) 44 #define HSW_PTE_UNCACHED (0) 45 #define GEN6_PTE_CACHE_LLC (2 << 1) 46 #define GEN7_PTE_CACHE_L3_LLC (3 << 1) 47 #define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) 48 #define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr) 49 50 /* Cacheability Control is a 4-bit value. The low three bits are stored in * 51 * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE. 52 */ 53 #define HSW_CACHEABILITY_CONTROL(bits) ((((bits) & 0x7) << 1) | \ 54 (((bits) & 0x8) << (11 - 3))) 55 #define HSW_WB_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x2) 56 #define HSW_WB_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x3) 57 #define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb) 58 #define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) 59 60 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, 61 enum i915_cache_level level, 62 bool valid) 63 { 64 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 65 pte |= GEN6_PTE_ADDR_ENCODE(addr); 66 67 switch (level) { 68 case I915_CACHE_L3_LLC: 69 case I915_CACHE_LLC: 70 pte |= GEN6_PTE_CACHE_LLC; 71 break; 72 case I915_CACHE_NONE: 73 pte |= GEN6_PTE_UNCACHED; 74 break; 75 default: 76 WARN_ON(1); 77 } 78 79 return pte; 80 } 81 82 static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr, 83 enum i915_cache_level level, 84 bool valid) 85 { 86 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 87 pte |= GEN6_PTE_ADDR_ENCODE(addr); 88 89 switch (level) { 90 case I915_CACHE_L3_LLC: 91 pte |= GEN7_PTE_CACHE_L3_LLC; 92 break; 93 case I915_CACHE_LLC: 94 pte |= GEN6_PTE_CACHE_LLC; 95 break; 96 case I915_CACHE_NONE: 97 pte |= GEN6_PTE_UNCACHED; 98 break; 99 default: 100 WARN_ON(1); 101 } 102 103 return pte; 104 } 105 106 #define BYT_PTE_WRITEABLE (1 << 1) 107 #define BYT_PTE_SNOOPED_BY_CPU_CACHES (1 << 2) 108 109 static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr, 110 enum i915_cache_level level, 111 bool valid) 112 { 113 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 114 pte |= GEN6_PTE_ADDR_ENCODE(addr); 115 116 /* Mark the page as writeable. Other platforms don't have a 117 * setting for read-only/writable, so this matches that behavior. 118 */ 119 pte |= BYT_PTE_WRITEABLE; 120 121 if (level != I915_CACHE_NONE) 122 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 123 124 return pte; 125 } 126 127 static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr, 128 enum i915_cache_level level, 129 bool valid) 130 { 131 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 132 pte |= HSW_PTE_ADDR_ENCODE(addr); 133 134 if (level != I915_CACHE_NONE) 135 pte |= HSW_WB_LLC_AGE3; 136 137 return pte; 138 } 139 140 static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, 141 enum i915_cache_level level, 142 bool valid) 143 { 144 gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0; 145 pte |= HSW_PTE_ADDR_ENCODE(addr); 146 147 switch (level) { 148 case I915_CACHE_NONE: 149 break; 150 case I915_CACHE_WT: 151 pte |= HSW_WT_ELLC_LLC_AGE0; 152 break; 153 default: 154 pte |= HSW_WB_ELLC_LLC_AGE0; 155 break; 156 } 157 158 return pte; 159 } 160 161 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) 162 { 163 struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; 164 gen6_gtt_pte_t __iomem *pd_addr; 165 uint32_t pd_entry; 166 int i; 167 168 WARN_ON(ppgtt->pd_offset & 0x3f); 169 pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm + 170 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); 171 for (i = 0; i < ppgtt->num_pd_entries; i++) { 172 dma_addr_t pt_addr; 173 174 pt_addr = ppgtt->pt_dma_addr[i]; 175 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); 176 pd_entry |= GEN6_PDE_VALID; 177 178 writel(pd_entry, pd_addr + i); 179 } 180 readl(pd_addr); 181 } 182 183 static int gen6_ppgtt_enable(struct drm_device *dev) 184 { 185 drm_i915_private_t *dev_priv = dev->dev_private; 186 uint32_t pd_offset; 187 struct intel_ring_buffer *ring; 188 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 189 int i; 190 191 BUG_ON(ppgtt->pd_offset & 0x3f); 192 193 gen6_write_pdes(ppgtt); 194 195 pd_offset = ppgtt->pd_offset; 196 pd_offset /= 64; /* in cachelines, */ 197 pd_offset <<= 16; 198 199 if (INTEL_INFO(dev)->gen == 6) { 200 uint32_t ecochk, gab_ctl, ecobits; 201 202 ecobits = I915_READ(GAC_ECO_BITS); 203 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 204 ECOBITS_PPGTT_CACHE64B); 205 206 gab_ctl = I915_READ(GAB_CTL); 207 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 208 209 ecochk = I915_READ(GAM_ECOCHK); 210 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | 211 ECOCHK_PPGTT_CACHE64B); 212 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 213 } else if (INTEL_INFO(dev)->gen >= 7) { 214 uint32_t ecochk, ecobits; 215 216 ecobits = I915_READ(GAC_ECO_BITS); 217 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 218 219 ecochk = I915_READ(GAM_ECOCHK); 220 if (IS_HASWELL(dev)) { 221 ecochk |= ECOCHK_PPGTT_WB_HSW; 222 } else { 223 ecochk |= ECOCHK_PPGTT_LLC_IVB; 224 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 225 } 226 I915_WRITE(GAM_ECOCHK, ecochk); 227 /* GFX_MODE is per-ring on gen7+ */ 228 } 229 230 for_each_ring(ring, dev_priv, i) { 231 if (INTEL_INFO(dev)->gen >= 7) 232 I915_WRITE(RING_MODE_GEN7(ring), 233 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 234 235 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 236 I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); 237 } 238 return 0; 239 } 240 241 /* PPGTT support for Sandybdrige/Gen6 and later */ 242 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 243 unsigned first_entry, 244 unsigned num_entries, 245 bool use_scratch) 246 { 247 struct i915_hw_ppgtt *ppgtt = 248 container_of(vm, struct i915_hw_ppgtt, base); 249 gen6_gtt_pte_t *pt_vaddr, scratch_pte; 250 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 251 unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; 252 unsigned last_pte, i; 253 254 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); 255 256 while (num_entries) { 257 last_pte = first_pte + num_entries; 258 if (last_pte > I915_PPGTT_PT_ENTRIES) 259 last_pte = I915_PPGTT_PT_ENTRIES; 260 261 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 262 263 for (i = first_pte; i < last_pte; i++) 264 pt_vaddr[i] = scratch_pte; 265 266 kunmap_atomic(pt_vaddr); 267 268 num_entries -= last_pte - first_pte; 269 first_pte = 0; 270 act_pt++; 271 } 272 } 273 274 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 275 struct sg_table *pages, 276 unsigned first_entry, 277 enum i915_cache_level cache_level) 278 { 279 struct i915_hw_ppgtt *ppgtt = 280 container_of(vm, struct i915_hw_ppgtt, base); 281 gen6_gtt_pte_t *pt_vaddr; 282 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 283 unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES; 284 struct sg_page_iter sg_iter; 285 286 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 287 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { 288 dma_addr_t page_addr; 289 290 page_addr = sg_page_iter_dma_address(&sg_iter); 291 pt_vaddr[act_pte] = vm->pte_encode(page_addr, cache_level, true); 292 if (++act_pte == I915_PPGTT_PT_ENTRIES) { 293 kunmap_atomic(pt_vaddr); 294 act_pt++; 295 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 296 act_pte = 0; 297 298 } 299 } 300 kunmap_atomic(pt_vaddr); 301 } 302 303 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 304 { 305 struct i915_hw_ppgtt *ppgtt = 306 container_of(vm, struct i915_hw_ppgtt, base); 307 int i; 308 309 drm_mm_takedown(&ppgtt->base.mm); 310 311 if (ppgtt->pt_dma_addr) { 312 for (i = 0; i < ppgtt->num_pd_entries; i++) 313 pci_unmap_page(ppgtt->base.dev->pdev, 314 ppgtt->pt_dma_addr[i], 315 4096, PCI_DMA_BIDIRECTIONAL); 316 } 317 318 kfree(ppgtt->pt_dma_addr); 319 for (i = 0; i < ppgtt->num_pd_entries; i++) 320 __free_page(ppgtt->pt_pages[i]); 321 kfree(ppgtt->pt_pages); 322 kfree(ppgtt); 323 } 324 325 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 326 { 327 struct drm_device *dev = ppgtt->base.dev; 328 struct drm_i915_private *dev_priv = dev->dev_private; 329 unsigned first_pd_entry_in_global_pt; 330 int i; 331 int ret = -ENOMEM; 332 333 /* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024 334 * entries. For aliasing ppgtt support we just steal them at the end for 335 * now. */ 336 first_pd_entry_in_global_pt = gtt_total_entries(dev_priv->gtt); 337 338 ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; 339 ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES; 340 ppgtt->enable = gen6_ppgtt_enable; 341 ppgtt->base.clear_range = gen6_ppgtt_clear_range; 342 ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; 343 ppgtt->base.cleanup = gen6_ppgtt_cleanup; 344 ppgtt->base.scratch = dev_priv->gtt.base.scratch; 345 ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries, 346 GFP_KERNEL); 347 if (!ppgtt->pt_pages) 348 return -ENOMEM; 349 350 for (i = 0; i < ppgtt->num_pd_entries; i++) { 351 ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL); 352 if (!ppgtt->pt_pages[i]) 353 goto err_pt_alloc; 354 } 355 356 ppgtt->pt_dma_addr = kzalloc(sizeof(dma_addr_t) *ppgtt->num_pd_entries, 357 GFP_KERNEL); 358 if (!ppgtt->pt_dma_addr) 359 goto err_pt_alloc; 360 361 for (i = 0; i < ppgtt->num_pd_entries; i++) { 362 dma_addr_t pt_addr; 363 364 pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096, 365 PCI_DMA_BIDIRECTIONAL); 366 367 if (pci_dma_mapping_error(dev->pdev, pt_addr)) { 368 ret = -EIO; 369 goto err_pd_pin; 370 371 } 372 ppgtt->pt_dma_addr[i] = pt_addr; 373 } 374 375 ppgtt->base.clear_range(&ppgtt->base, 0, 376 ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES, true); 377 378 ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t); 379 380 return 0; 381 382 err_pd_pin: 383 if (ppgtt->pt_dma_addr) { 384 for (i--; i >= 0; i--) 385 pci_unmap_page(dev->pdev, ppgtt->pt_dma_addr[i], 386 4096, PCI_DMA_BIDIRECTIONAL); 387 } 388 err_pt_alloc: 389 kfree(ppgtt->pt_dma_addr); 390 for (i = 0; i < ppgtt->num_pd_entries; i++) { 391 if (ppgtt->pt_pages[i]) 392 __free_page(ppgtt->pt_pages[i]); 393 } 394 kfree(ppgtt->pt_pages); 395 396 return ret; 397 } 398 399 static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev) 400 { 401 struct drm_i915_private *dev_priv = dev->dev_private; 402 struct i915_hw_ppgtt *ppgtt; 403 int ret; 404 405 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 406 if (!ppgtt) 407 return -ENOMEM; 408 409 ppgtt->base.dev = dev; 410 411 if (INTEL_INFO(dev)->gen < 8) 412 ret = gen6_ppgtt_init(ppgtt); 413 else 414 BUG(); 415 416 if (ret) 417 kfree(ppgtt); 418 else { 419 dev_priv->mm.aliasing_ppgtt = ppgtt; 420 drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, 421 ppgtt->base.total); 422 } 423 424 return ret; 425 } 426 427 void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev) 428 { 429 struct drm_i915_private *dev_priv = dev->dev_private; 430 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 431 432 if (!ppgtt) 433 return; 434 435 ppgtt->base.cleanup(&ppgtt->base); 436 dev_priv->mm.aliasing_ppgtt = NULL; 437 } 438 439 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, 440 struct drm_i915_gem_object *obj, 441 enum i915_cache_level cache_level) 442 { 443 ppgtt->base.insert_entries(&ppgtt->base, obj->pages, 444 i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT, 445 cache_level); 446 } 447 448 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, 449 struct drm_i915_gem_object *obj) 450 { 451 ppgtt->base.clear_range(&ppgtt->base, 452 i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT, 453 obj->base.size >> PAGE_SHIFT, 454 true); 455 } 456 457 extern int intel_iommu_gfx_mapped; 458 /* Certain Gen5 chipsets require require idling the GPU before 459 * unmapping anything from the GTT when VT-d is enabled. 460 */ 461 static inline bool needs_idle_maps(struct drm_device *dev) 462 { 463 #ifdef CONFIG_INTEL_IOMMU 464 /* Query intel_iommu to see if we need the workaround. Presumably that 465 * was loaded first. 466 */ 467 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) 468 return true; 469 #endif 470 return false; 471 } 472 473 static bool do_idling(struct drm_i915_private *dev_priv) 474 { 475 bool ret = dev_priv->mm.interruptible; 476 477 if (unlikely(dev_priv->gtt.do_idle_maps)) { 478 dev_priv->mm.interruptible = false; 479 if (i915_gpu_idle(dev_priv->dev)) { 480 DRM_ERROR("Couldn't idle GPU\n"); 481 /* Wait a bit, in hopes it avoids the hang */ 482 udelay(10); 483 } 484 } 485 486 return ret; 487 } 488 489 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) 490 { 491 if (unlikely(dev_priv->gtt.do_idle_maps)) 492 dev_priv->mm.interruptible = interruptible; 493 } 494 495 void i915_check_and_clear_faults(struct drm_device *dev) 496 { 497 struct drm_i915_private *dev_priv = dev->dev_private; 498 struct intel_ring_buffer *ring; 499 int i; 500 501 if (INTEL_INFO(dev)->gen < 6) 502 return; 503 504 for_each_ring(ring, dev_priv, i) { 505 u32 fault_reg; 506 fault_reg = I915_READ(RING_FAULT_REG(ring)); 507 if (fault_reg & RING_FAULT_VALID) { 508 DRM_DEBUG_DRIVER("Unexpected fault\n" 509 "\tAddr: 0x%08lx\\n" 510 "\tAddress space: %s\n" 511 "\tSource ID: %d\n" 512 "\tType: %d\n", 513 fault_reg & PAGE_MASK, 514 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", 515 RING_FAULT_SRCID(fault_reg), 516 RING_FAULT_FAULT_TYPE(fault_reg)); 517 I915_WRITE(RING_FAULT_REG(ring), 518 fault_reg & ~RING_FAULT_VALID); 519 } 520 } 521 POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS])); 522 } 523 524 void i915_gem_suspend_gtt_mappings(struct drm_device *dev) 525 { 526 struct drm_i915_private *dev_priv = dev->dev_private; 527 528 /* Don't bother messing with faults pre GEN6 as we have little 529 * documentation supporting that it's a good idea. 530 */ 531 if (INTEL_INFO(dev)->gen < 6) 532 return; 533 534 i915_check_and_clear_faults(dev); 535 536 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 537 dev_priv->gtt.base.start / PAGE_SIZE, 538 dev_priv->gtt.base.total / PAGE_SIZE, 539 false); 540 } 541 542 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 543 { 544 struct drm_i915_private *dev_priv = dev->dev_private; 545 struct drm_i915_gem_object *obj; 546 547 i915_check_and_clear_faults(dev); 548 549 /* First fill our portion of the GTT with scratch pages */ 550 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 551 dev_priv->gtt.base.start / PAGE_SIZE, 552 dev_priv->gtt.base.total / PAGE_SIZE, 553 true); 554 555 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 556 i915_gem_clflush_object(obj, obj->pin_display); 557 i915_gem_gtt_bind_object(obj, obj->cache_level); 558 } 559 560 i915_gem_chipset_flush(dev); 561 } 562 563 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) 564 { 565 if (obj->has_dma_mapping) 566 return 0; 567 568 if (!dma_map_sg(&obj->base.dev->pdev->dev, 569 obj->pages->sgl, obj->pages->nents, 570 PCI_DMA_BIDIRECTIONAL)) 571 return -ENOSPC; 572 573 return 0; 574 } 575 576 /* 577 * Binds an object into the global gtt with the specified cache level. The object 578 * will be accessible to the GPU via commands whose operands reference offsets 579 * within the global GTT as well as accessible by the GPU through the GMADR 580 * mapped BAR (dev_priv->mm.gtt->gtt). 581 */ 582 static void gen6_ggtt_insert_entries(struct i915_address_space *vm, 583 struct sg_table *st, 584 unsigned int first_entry, 585 enum i915_cache_level level) 586 { 587 struct drm_i915_private *dev_priv = vm->dev->dev_private; 588 gen6_gtt_pte_t __iomem *gtt_entries = 589 (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 590 int i = 0; 591 struct sg_page_iter sg_iter; 592 dma_addr_t addr; 593 594 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 595 addr = sg_page_iter_dma_address(&sg_iter); 596 iowrite32(vm->pte_encode(addr, level, true), >t_entries[i]); 597 i++; 598 } 599 600 /* XXX: This serves as a posting read to make sure that the PTE has 601 * actually been updated. There is some concern that even though 602 * registers and PTEs are within the same BAR that they are potentially 603 * of NUMA access patterns. Therefore, even with the way we assume 604 * hardware should work, we must keep this posting read for paranoia. 605 */ 606 if (i != 0) 607 WARN_ON(readl(>t_entries[i-1]) != 608 vm->pte_encode(addr, level, true)); 609 610 /* This next bit makes the above posting read even more important. We 611 * want to flush the TLBs only after we're certain all the PTE updates 612 * have finished. 613 */ 614 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 615 POSTING_READ(GFX_FLSH_CNTL_GEN6); 616 } 617 618 static void gen6_ggtt_clear_range(struct i915_address_space *vm, 619 unsigned int first_entry, 620 unsigned int num_entries, 621 bool use_scratch) 622 { 623 struct drm_i915_private *dev_priv = vm->dev->dev_private; 624 gen6_gtt_pte_t scratch_pte, __iomem *gtt_base = 625 (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 626 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 627 int i; 628 629 if (WARN(num_entries > max_entries, 630 "First entry = %d; Num entries = %d (max=%d)\n", 631 first_entry, num_entries, max_entries)) 632 num_entries = max_entries; 633 634 scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch); 635 636 for (i = 0; i < num_entries; i++) 637 iowrite32(scratch_pte, >t_base[i]); 638 readl(gtt_base); 639 } 640 641 642 static void i915_ggtt_insert_entries(struct i915_address_space *vm, 643 struct sg_table *st, 644 unsigned int pg_start, 645 enum i915_cache_level cache_level) 646 { 647 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 648 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 649 650 intel_gtt_insert_sg_entries(st, pg_start, flags); 651 652 } 653 654 static void i915_ggtt_clear_range(struct i915_address_space *vm, 655 unsigned int first_entry, 656 unsigned int num_entries, 657 bool unused) 658 { 659 intel_gtt_clear_range(first_entry, num_entries); 660 } 661 662 663 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, 664 enum i915_cache_level cache_level) 665 { 666 struct drm_device *dev = obj->base.dev; 667 struct drm_i915_private *dev_priv = dev->dev_private; 668 const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; 669 670 dev_priv->gtt.base.insert_entries(&dev_priv->gtt.base, obj->pages, 671 entry, 672 cache_level); 673 674 obj->has_global_gtt_mapping = 1; 675 } 676 677 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) 678 { 679 struct drm_device *dev = obj->base.dev; 680 struct drm_i915_private *dev_priv = dev->dev_private; 681 const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; 682 683 dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, 684 entry, 685 obj->base.size >> PAGE_SHIFT, 686 true); 687 688 obj->has_global_gtt_mapping = 0; 689 } 690 691 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 692 { 693 struct drm_device *dev = obj->base.dev; 694 struct drm_i915_private *dev_priv = dev->dev_private; 695 bool interruptible; 696 697 interruptible = do_idling(dev_priv); 698 699 if (!obj->has_dma_mapping) 700 dma_unmap_sg(&dev->pdev->dev, 701 obj->pages->sgl, obj->pages->nents, 702 PCI_DMA_BIDIRECTIONAL); 703 704 undo_idling(dev_priv, interruptible); 705 } 706 707 static void i915_gtt_color_adjust(struct drm_mm_node *node, 708 unsigned long color, 709 unsigned long *start, 710 unsigned long *end) 711 { 712 if (node->color != color) 713 *start += 4096; 714 715 if (!list_empty(&node->node_list)) { 716 node = list_entry(node->node_list.next, 717 struct drm_mm_node, 718 node_list); 719 if (node->allocated && node->color != color) 720 *end -= 4096; 721 } 722 } 723 void i915_gem_setup_global_gtt(struct drm_device *dev, 724 unsigned long start, 725 unsigned long mappable_end, 726 unsigned long end) 727 { 728 /* Let GEM Manage all of the aperture. 729 * 730 * However, leave one page at the end still bound to the scratch page. 731 * There are a number of places where the hardware apparently prefetches 732 * past the end of the object, and we've seen multiple hangs with the 733 * GPU head pointer stuck in a batchbuffer bound at the last page of the 734 * aperture. One page should be enough to keep any prefetching inside 735 * of the aperture. 736 */ 737 struct drm_i915_private *dev_priv = dev->dev_private; 738 struct i915_address_space *ggtt_vm = &dev_priv->gtt.base; 739 struct drm_mm_node *entry; 740 struct drm_i915_gem_object *obj; 741 unsigned long hole_start, hole_end; 742 743 BUG_ON(mappable_end > end); 744 745 /* Subtract the guard page ... */ 746 drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE); 747 if (!HAS_LLC(dev)) 748 dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust; 749 750 /* Mark any preallocated objects as occupied */ 751 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 752 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); 753 int ret; 754 DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n", 755 i915_gem_obj_ggtt_offset(obj), obj->base.size); 756 757 WARN_ON(i915_gem_obj_ggtt_bound(obj)); 758 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node); 759 if (ret) 760 DRM_DEBUG_KMS("Reservation failed\n"); 761 obj->has_global_gtt_mapping = 1; 762 list_add(&vma->vma_link, &obj->vma_list); 763 } 764 765 dev_priv->gtt.base.start = start; 766 dev_priv->gtt.base.total = end - start; 767 768 /* Clear any non-preallocated blocks */ 769 drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) { 770 const unsigned long count = (hole_end - hole_start) / PAGE_SIZE; 771 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 772 hole_start, hole_end); 773 ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count, true); 774 } 775 776 /* And finally clear the reserved guard page */ 777 ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1, true); 778 } 779 780 static bool 781 intel_enable_ppgtt(struct drm_device *dev) 782 { 783 if (i915_enable_ppgtt >= 0) 784 return i915_enable_ppgtt; 785 786 #ifdef CONFIG_INTEL_IOMMU 787 /* Disable ppgtt on SNB if VT-d is on. */ 788 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) 789 return false; 790 #endif 791 792 return true; 793 } 794 795 void i915_gem_init_global_gtt(struct drm_device *dev) 796 { 797 struct drm_i915_private *dev_priv = dev->dev_private; 798 unsigned long gtt_size, mappable_size; 799 800 gtt_size = dev_priv->gtt.base.total; 801 mappable_size = dev_priv->gtt.mappable_end; 802 803 if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { 804 int ret; 805 806 if (INTEL_INFO(dev)->gen <= 7) { 807 /* PPGTT pdes are stolen from global gtt ptes, so shrink the 808 * aperture accordingly when using aliasing ppgtt. */ 809 gtt_size -= GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE; 810 } 811 812 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); 813 814 ret = i915_gem_init_aliasing_ppgtt(dev); 815 if (!ret) 816 return; 817 818 DRM_ERROR("Aliased PPGTT setup failed %d\n", ret); 819 drm_mm_takedown(&dev_priv->gtt.base.mm); 820 gtt_size += GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE; 821 } 822 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); 823 } 824 825 static int setup_scratch_page(struct drm_device *dev) 826 { 827 struct drm_i915_private *dev_priv = dev->dev_private; 828 struct page *page; 829 dma_addr_t dma_addr; 830 831 page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); 832 if (page == NULL) 833 return -ENOMEM; 834 get_page(page); 835 set_pages_uc(page, 1); 836 837 #ifdef CONFIG_INTEL_IOMMU 838 dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE, 839 PCI_DMA_BIDIRECTIONAL); 840 if (pci_dma_mapping_error(dev->pdev, dma_addr)) 841 return -EINVAL; 842 #else 843 dma_addr = page_to_phys(page); 844 #endif 845 dev_priv->gtt.base.scratch.page = page; 846 dev_priv->gtt.base.scratch.addr = dma_addr; 847 848 return 0; 849 } 850 851 static void teardown_scratch_page(struct drm_device *dev) 852 { 853 struct drm_i915_private *dev_priv = dev->dev_private; 854 struct page *page = dev_priv->gtt.base.scratch.page; 855 856 set_pages_wb(page, 1); 857 pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr, 858 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 859 put_page(page); 860 __free_page(page); 861 } 862 863 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 864 { 865 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 866 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 867 return snb_gmch_ctl << 20; 868 } 869 870 static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 871 { 872 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 873 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 874 return snb_gmch_ctl << 25; /* 32 MB units */ 875 } 876 877 static int gen6_gmch_probe(struct drm_device *dev, 878 size_t *gtt_total, 879 size_t *stolen, 880 phys_addr_t *mappable_base, 881 unsigned long *mappable_end) 882 { 883 struct drm_i915_private *dev_priv = dev->dev_private; 884 phys_addr_t gtt_bus_addr; 885 unsigned int gtt_size; 886 u16 snb_gmch_ctl; 887 int ret; 888 889 *mappable_base = pci_resource_start(dev->pdev, 2); 890 *mappable_end = pci_resource_len(dev->pdev, 2); 891 892 /* 64/512MB is the current min/max we actually know of, but this is just 893 * a coarse sanity check. 894 */ 895 if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) { 896 DRM_ERROR("Unknown GMADR size (%lx)\n", 897 dev_priv->gtt.mappable_end); 898 return -ENXIO; 899 } 900 901 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) 902 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); 903 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 904 gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl); 905 906 *stolen = gen6_get_stolen_size(snb_gmch_ctl); 907 *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT; 908 909 /* For Modern GENs the PTEs and register space are split in the BAR */ 910 gtt_bus_addr = pci_resource_start(dev->pdev, 0) + 911 (pci_resource_len(dev->pdev, 0) / 2); 912 913 dev_priv->gtt.gsm = ioremap_wc(gtt_bus_addr, gtt_size); 914 if (!dev_priv->gtt.gsm) { 915 DRM_ERROR("Failed to map the gtt page table\n"); 916 return -ENOMEM; 917 } 918 919 ret = setup_scratch_page(dev); 920 if (ret) 921 DRM_ERROR("Scratch setup failed\n"); 922 923 dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range; 924 dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries; 925 926 return ret; 927 } 928 929 static void gen6_gmch_remove(struct i915_address_space *vm) 930 { 931 932 struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base); 933 iounmap(gtt->gsm); 934 teardown_scratch_page(vm->dev); 935 } 936 937 static int i915_gmch_probe(struct drm_device *dev, 938 size_t *gtt_total, 939 size_t *stolen, 940 phys_addr_t *mappable_base, 941 unsigned long *mappable_end) 942 { 943 struct drm_i915_private *dev_priv = dev->dev_private; 944 int ret; 945 946 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL); 947 if (!ret) { 948 DRM_ERROR("failed to set up gmch\n"); 949 return -EIO; 950 } 951 952 intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end); 953 954 dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev); 955 dev_priv->gtt.base.clear_range = i915_ggtt_clear_range; 956 dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries; 957 958 return 0; 959 } 960 961 static void i915_gmch_remove(struct i915_address_space *vm) 962 { 963 intel_gmch_remove(); 964 } 965 966 int i915_gem_gtt_init(struct drm_device *dev) 967 { 968 struct drm_i915_private *dev_priv = dev->dev_private; 969 struct i915_gtt *gtt = &dev_priv->gtt; 970 int ret; 971 972 if (INTEL_INFO(dev)->gen <= 5) { 973 gtt->gtt_probe = i915_gmch_probe; 974 gtt->base.cleanup = i915_gmch_remove; 975 } else { 976 gtt->gtt_probe = gen6_gmch_probe; 977 gtt->base.cleanup = gen6_gmch_remove; 978 if (IS_HASWELL(dev) && dev_priv->ellc_size) 979 gtt->base.pte_encode = iris_pte_encode; 980 else if (IS_HASWELL(dev)) 981 gtt->base.pte_encode = hsw_pte_encode; 982 else if (IS_VALLEYVIEW(dev)) 983 gtt->base.pte_encode = byt_pte_encode; 984 else if (INTEL_INFO(dev)->gen >= 7) 985 gtt->base.pte_encode = ivb_pte_encode; 986 else 987 gtt->base.pte_encode = snb_pte_encode; 988 } 989 990 ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size, 991 >t->mappable_base, >t->mappable_end); 992 if (ret) 993 return ret; 994 995 gtt->base.dev = dev; 996 997 /* GMADR is the PCI mmio aperture into the global GTT. */ 998 DRM_INFO("Memory usable by graphics device = %zdM\n", 999 gtt->base.total >> 20); 1000 DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20); 1001 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20); 1002 1003 return 0; 1004 } 1005