1 /* 2 * Copyright © 2010 Daniel Vetter 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <drm/drmP.h> 26 #include <drm/i915_drm.h> 27 #include "i915_drv.h" 28 #include "i915_trace.h" 29 #include "intel_drv.h" 30 31 /* PPGTT stuff */ 32 #define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) 33 34 #define GEN6_PDE_VALID (1 << 0) 35 /* gen6+ has bit 11-4 for physical addr bit 39-32 */ 36 #define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) 37 38 #define GEN6_PTE_VALID (1 << 0) 39 #define GEN6_PTE_UNCACHED (1 << 1) 40 #define HSW_PTE_UNCACHED (0) 41 #define GEN6_PTE_CACHE_LLC (2 << 1) 42 #define GEN6_PTE_CACHE_LLC_MLC (3 << 1) 43 #define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) 44 45 static gen6_gtt_pte_t gen6_pte_encode(struct drm_device *dev, 46 dma_addr_t addr, 47 enum i915_cache_level level) 48 { 49 gen6_gtt_pte_t pte = GEN6_PTE_VALID; 50 pte |= GEN6_PTE_ADDR_ENCODE(addr); 51 52 switch (level) { 53 case I915_CACHE_LLC_MLC: 54 pte |= GEN6_PTE_CACHE_LLC_MLC; 55 break; 56 case I915_CACHE_LLC: 57 pte |= GEN6_PTE_CACHE_LLC; 58 break; 59 case I915_CACHE_NONE: 60 pte |= GEN6_PTE_UNCACHED; 61 break; 62 default: 63 BUG(); 64 } 65 66 return pte; 67 } 68 69 #define BYT_PTE_WRITEABLE (1 << 1) 70 #define BYT_PTE_SNOOPED_BY_CPU_CACHES (1 << 2) 71 72 static gen6_gtt_pte_t byt_pte_encode(struct drm_device *dev, 73 dma_addr_t addr, 74 enum i915_cache_level level) 75 { 76 gen6_gtt_pte_t pte = GEN6_PTE_VALID; 77 pte |= GEN6_PTE_ADDR_ENCODE(addr); 78 79 /* Mark the page as writeable. Other platforms don't have a 80 * setting for read-only/writable, so this matches that behavior. 81 */ 82 pte |= BYT_PTE_WRITEABLE; 83 84 if (level != I915_CACHE_NONE) 85 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; 86 87 return pte; 88 } 89 90 static gen6_gtt_pte_t hsw_pte_encode(struct drm_device *dev, 91 dma_addr_t addr, 92 enum i915_cache_level level) 93 { 94 gen6_gtt_pte_t pte = GEN6_PTE_VALID; 95 pte |= GEN6_PTE_ADDR_ENCODE(addr); 96 97 if (level != I915_CACHE_NONE) 98 pte |= GEN6_PTE_CACHE_LLC; 99 100 return pte; 101 } 102 103 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) 104 { 105 struct drm_i915_private *dev_priv = ppgtt->dev->dev_private; 106 gen6_gtt_pte_t __iomem *pd_addr; 107 uint32_t pd_entry; 108 int i; 109 110 WARN_ON(ppgtt->pd_offset & 0x3f); 111 pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm + 112 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t); 113 for (i = 0; i < ppgtt->num_pd_entries; i++) { 114 dma_addr_t pt_addr; 115 116 pt_addr = ppgtt->pt_dma_addr[i]; 117 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); 118 pd_entry |= GEN6_PDE_VALID; 119 120 writel(pd_entry, pd_addr + i); 121 } 122 readl(pd_addr); 123 } 124 125 static int gen6_ppgtt_enable(struct drm_device *dev) 126 { 127 drm_i915_private_t *dev_priv = dev->dev_private; 128 uint32_t pd_offset; 129 struct intel_ring_buffer *ring; 130 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 131 int i; 132 133 BUG_ON(ppgtt->pd_offset & 0x3f); 134 135 gen6_write_pdes(ppgtt); 136 137 pd_offset = ppgtt->pd_offset; 138 pd_offset /= 64; /* in cachelines, */ 139 pd_offset <<= 16; 140 141 if (INTEL_INFO(dev)->gen == 6) { 142 uint32_t ecochk, gab_ctl, ecobits; 143 144 ecobits = I915_READ(GAC_ECO_BITS); 145 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | 146 ECOBITS_PPGTT_CACHE64B); 147 148 gab_ctl = I915_READ(GAB_CTL); 149 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); 150 151 ecochk = I915_READ(GAM_ECOCHK); 152 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | 153 ECOCHK_PPGTT_CACHE64B); 154 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 155 } else if (INTEL_INFO(dev)->gen >= 7) { 156 uint32_t ecochk, ecobits; 157 158 ecobits = I915_READ(GAC_ECO_BITS); 159 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); 160 161 ecochk = I915_READ(GAM_ECOCHK); 162 if (IS_HASWELL(dev)) { 163 ecochk |= ECOCHK_PPGTT_WB_HSW; 164 } else { 165 ecochk |= ECOCHK_PPGTT_LLC_IVB; 166 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 167 } 168 I915_WRITE(GAM_ECOCHK, ecochk); 169 /* GFX_MODE is per-ring on gen7+ */ 170 } 171 172 for_each_ring(ring, dev_priv, i) { 173 if (INTEL_INFO(dev)->gen >= 7) 174 I915_WRITE(RING_MODE_GEN7(ring), 175 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 176 177 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 178 I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); 179 } 180 return 0; 181 } 182 183 /* PPGTT support for Sandybdrige/Gen6 and later */ 184 static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt, 185 unsigned first_entry, 186 unsigned num_entries) 187 { 188 gen6_gtt_pte_t *pt_vaddr, scratch_pte; 189 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 190 unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; 191 unsigned last_pte, i; 192 193 scratch_pte = ppgtt->pte_encode(ppgtt->dev, 194 ppgtt->scratch_page_dma_addr, 195 I915_CACHE_LLC); 196 197 while (num_entries) { 198 last_pte = first_pte + num_entries; 199 if (last_pte > I915_PPGTT_PT_ENTRIES) 200 last_pte = I915_PPGTT_PT_ENTRIES; 201 202 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 203 204 for (i = first_pte; i < last_pte; i++) 205 pt_vaddr[i] = scratch_pte; 206 207 kunmap_atomic(pt_vaddr); 208 209 num_entries -= last_pte - first_pte; 210 first_pte = 0; 211 act_pt++; 212 } 213 } 214 215 static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt, 216 struct sg_table *pages, 217 unsigned first_entry, 218 enum i915_cache_level cache_level) 219 { 220 gen6_gtt_pte_t *pt_vaddr; 221 unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; 222 unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES; 223 struct sg_page_iter sg_iter; 224 225 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 226 for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { 227 dma_addr_t page_addr; 228 229 page_addr = sg_page_iter_dma_address(&sg_iter); 230 pt_vaddr[act_pte] = ppgtt->pte_encode(ppgtt->dev, page_addr, 231 cache_level); 232 if (++act_pte == I915_PPGTT_PT_ENTRIES) { 233 kunmap_atomic(pt_vaddr); 234 act_pt++; 235 pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); 236 act_pte = 0; 237 238 } 239 } 240 kunmap_atomic(pt_vaddr); 241 } 242 243 static void gen6_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt) 244 { 245 int i; 246 247 if (ppgtt->pt_dma_addr) { 248 for (i = 0; i < ppgtt->num_pd_entries; i++) 249 pci_unmap_page(ppgtt->dev->pdev, 250 ppgtt->pt_dma_addr[i], 251 4096, PCI_DMA_BIDIRECTIONAL); 252 } 253 254 kfree(ppgtt->pt_dma_addr); 255 for (i = 0; i < ppgtt->num_pd_entries; i++) 256 __free_page(ppgtt->pt_pages[i]); 257 kfree(ppgtt->pt_pages); 258 kfree(ppgtt); 259 } 260 261 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) 262 { 263 struct drm_device *dev = ppgtt->dev; 264 struct drm_i915_private *dev_priv = dev->dev_private; 265 unsigned first_pd_entry_in_global_pt; 266 int i; 267 int ret = -ENOMEM; 268 269 /* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024 270 * entries. For aliasing ppgtt support we just steal them at the end for 271 * now. */ 272 first_pd_entry_in_global_pt = gtt_total_entries(dev_priv->gtt); 273 274 if (IS_HASWELL(dev)) { 275 ppgtt->pte_encode = hsw_pte_encode; 276 } else if (IS_VALLEYVIEW(dev)) { 277 ppgtt->pte_encode = byt_pte_encode; 278 } else { 279 ppgtt->pte_encode = gen6_pte_encode; 280 } 281 ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES; 282 ppgtt->enable = gen6_ppgtt_enable; 283 ppgtt->clear_range = gen6_ppgtt_clear_range; 284 ppgtt->insert_entries = gen6_ppgtt_insert_entries; 285 ppgtt->cleanup = gen6_ppgtt_cleanup; 286 ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries, 287 GFP_KERNEL); 288 if (!ppgtt->pt_pages) 289 return -ENOMEM; 290 291 for (i = 0; i < ppgtt->num_pd_entries; i++) { 292 ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL); 293 if (!ppgtt->pt_pages[i]) 294 goto err_pt_alloc; 295 } 296 297 ppgtt->pt_dma_addr = kzalloc(sizeof(dma_addr_t) *ppgtt->num_pd_entries, 298 GFP_KERNEL); 299 if (!ppgtt->pt_dma_addr) 300 goto err_pt_alloc; 301 302 for (i = 0; i < ppgtt->num_pd_entries; i++) { 303 dma_addr_t pt_addr; 304 305 pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096, 306 PCI_DMA_BIDIRECTIONAL); 307 308 if (pci_dma_mapping_error(dev->pdev, pt_addr)) { 309 ret = -EIO; 310 goto err_pd_pin; 311 312 } 313 ppgtt->pt_dma_addr[i] = pt_addr; 314 } 315 316 ppgtt->clear_range(ppgtt, 0, 317 ppgtt->num_pd_entries*I915_PPGTT_PT_ENTRIES); 318 319 ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t); 320 321 return 0; 322 323 err_pd_pin: 324 if (ppgtt->pt_dma_addr) { 325 for (i--; i >= 0; i--) 326 pci_unmap_page(dev->pdev, ppgtt->pt_dma_addr[i], 327 4096, PCI_DMA_BIDIRECTIONAL); 328 } 329 err_pt_alloc: 330 kfree(ppgtt->pt_dma_addr); 331 for (i = 0; i < ppgtt->num_pd_entries; i++) { 332 if (ppgtt->pt_pages[i]) 333 __free_page(ppgtt->pt_pages[i]); 334 } 335 kfree(ppgtt->pt_pages); 336 337 return ret; 338 } 339 340 static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev) 341 { 342 struct drm_i915_private *dev_priv = dev->dev_private; 343 struct i915_hw_ppgtt *ppgtt; 344 int ret; 345 346 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 347 if (!ppgtt) 348 return -ENOMEM; 349 350 ppgtt->dev = dev; 351 ppgtt->scratch_page_dma_addr = dev_priv->gtt.scratch_page_dma; 352 353 if (INTEL_INFO(dev)->gen < 8) 354 ret = gen6_ppgtt_init(ppgtt); 355 else 356 BUG(); 357 358 if (ret) 359 kfree(ppgtt); 360 else 361 dev_priv->mm.aliasing_ppgtt = ppgtt; 362 363 return ret; 364 } 365 366 void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev) 367 { 368 struct drm_i915_private *dev_priv = dev->dev_private; 369 struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; 370 371 if (!ppgtt) 372 return; 373 374 ppgtt->cleanup(ppgtt); 375 dev_priv->mm.aliasing_ppgtt = NULL; 376 } 377 378 void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, 379 struct drm_i915_gem_object *obj, 380 enum i915_cache_level cache_level) 381 { 382 ppgtt->insert_entries(ppgtt, obj->pages, 383 obj->gtt_space->start >> PAGE_SHIFT, 384 cache_level); 385 } 386 387 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, 388 struct drm_i915_gem_object *obj) 389 { 390 ppgtt->clear_range(ppgtt, 391 obj->gtt_space->start >> PAGE_SHIFT, 392 obj->base.size >> PAGE_SHIFT); 393 } 394 395 extern int intel_iommu_gfx_mapped; 396 /* Certain Gen5 chipsets require require idling the GPU before 397 * unmapping anything from the GTT when VT-d is enabled. 398 */ 399 static inline bool needs_idle_maps(struct drm_device *dev) 400 { 401 #ifdef CONFIG_INTEL_IOMMU 402 /* Query intel_iommu to see if we need the workaround. Presumably that 403 * was loaded first. 404 */ 405 if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) 406 return true; 407 #endif 408 return false; 409 } 410 411 static bool do_idling(struct drm_i915_private *dev_priv) 412 { 413 bool ret = dev_priv->mm.interruptible; 414 415 if (unlikely(dev_priv->gtt.do_idle_maps)) { 416 dev_priv->mm.interruptible = false; 417 if (i915_gpu_idle(dev_priv->dev)) { 418 DRM_ERROR("Couldn't idle GPU\n"); 419 /* Wait a bit, in hopes it avoids the hang */ 420 udelay(10); 421 } 422 } 423 424 return ret; 425 } 426 427 static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) 428 { 429 if (unlikely(dev_priv->gtt.do_idle_maps)) 430 dev_priv->mm.interruptible = interruptible; 431 } 432 433 void i915_gem_restore_gtt_mappings(struct drm_device *dev) 434 { 435 struct drm_i915_private *dev_priv = dev->dev_private; 436 struct drm_i915_gem_object *obj; 437 438 /* First fill our portion of the GTT with scratch pages */ 439 dev_priv->gtt.gtt_clear_range(dev, dev_priv->gtt.start / PAGE_SIZE, 440 dev_priv->gtt.total / PAGE_SIZE); 441 442 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 443 i915_gem_clflush_object(obj); 444 i915_gem_gtt_bind_object(obj, obj->cache_level); 445 } 446 447 i915_gem_chipset_flush(dev); 448 } 449 450 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) 451 { 452 if (obj->has_dma_mapping) 453 return 0; 454 455 if (!dma_map_sg(&obj->base.dev->pdev->dev, 456 obj->pages->sgl, obj->pages->nents, 457 PCI_DMA_BIDIRECTIONAL)) 458 return -ENOSPC; 459 460 return 0; 461 } 462 463 /* 464 * Binds an object into the global gtt with the specified cache level. The object 465 * will be accessible to the GPU via commands whose operands reference offsets 466 * within the global GTT as well as accessible by the GPU through the GMADR 467 * mapped BAR (dev_priv->mm.gtt->gtt). 468 */ 469 static void gen6_ggtt_insert_entries(struct drm_device *dev, 470 struct sg_table *st, 471 unsigned int first_entry, 472 enum i915_cache_level level) 473 { 474 struct drm_i915_private *dev_priv = dev->dev_private; 475 gen6_gtt_pte_t __iomem *gtt_entries = 476 (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; 477 int i = 0; 478 struct sg_page_iter sg_iter; 479 dma_addr_t addr; 480 481 for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { 482 addr = sg_page_iter_dma_address(&sg_iter); 483 iowrite32(dev_priv->gtt.pte_encode(dev, addr, level), 484 >t_entries[i]); 485 i++; 486 } 487 488 /* XXX: This serves as a posting read to make sure that the PTE has 489 * actually been updated. There is some concern that even though 490 * registers and PTEs are within the same BAR that they are potentially 491 * of NUMA access patterns. Therefore, even with the way we assume 492 * hardware should work, we must keep this posting read for paranoia. 493 */ 494 if (i != 0) 495 WARN_ON(readl(>t_entries[i-1]) 496 != dev_priv->gtt.pte_encode(dev, addr, level)); 497 498 /* This next bit makes the above posting read even more important. We 499 * want to flush the TLBs only after we're certain all the PTE updates 500 * have finished. 501 */ 502 I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); 503 POSTING_READ(GFX_FLSH_CNTL_GEN6); 504 } 505 506 static void gen6_ggtt_clear_range(struct drm_device *dev, 507 unsigned int first_entry, 508 unsigned int num_entries) 509 { 510 struct drm_i915_private *dev_priv = dev->dev_private; 511 gen6_gtt_pte_t scratch_pte, __iomem *gtt_base = 512 (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; 513 const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; 514 int i; 515 516 if (WARN(num_entries > max_entries, 517 "First entry = %d; Num entries = %d (max=%d)\n", 518 first_entry, num_entries, max_entries)) 519 num_entries = max_entries; 520 521 scratch_pte = dev_priv->gtt.pte_encode(dev, 522 dev_priv->gtt.scratch_page_dma, 523 I915_CACHE_LLC); 524 for (i = 0; i < num_entries; i++) 525 iowrite32(scratch_pte, >t_base[i]); 526 readl(gtt_base); 527 } 528 529 530 static void i915_ggtt_insert_entries(struct drm_device *dev, 531 struct sg_table *st, 532 unsigned int pg_start, 533 enum i915_cache_level cache_level) 534 { 535 unsigned int flags = (cache_level == I915_CACHE_NONE) ? 536 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; 537 538 intel_gtt_insert_sg_entries(st, pg_start, flags); 539 540 } 541 542 static void i915_ggtt_clear_range(struct drm_device *dev, 543 unsigned int first_entry, 544 unsigned int num_entries) 545 { 546 intel_gtt_clear_range(first_entry, num_entries); 547 } 548 549 550 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, 551 enum i915_cache_level cache_level) 552 { 553 struct drm_device *dev = obj->base.dev; 554 struct drm_i915_private *dev_priv = dev->dev_private; 555 556 dev_priv->gtt.gtt_insert_entries(dev, obj->pages, 557 obj->gtt_space->start >> PAGE_SHIFT, 558 cache_level); 559 560 obj->has_global_gtt_mapping = 1; 561 } 562 563 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) 564 { 565 struct drm_device *dev = obj->base.dev; 566 struct drm_i915_private *dev_priv = dev->dev_private; 567 568 dev_priv->gtt.gtt_clear_range(obj->base.dev, 569 obj->gtt_space->start >> PAGE_SHIFT, 570 obj->base.size >> PAGE_SHIFT); 571 572 obj->has_global_gtt_mapping = 0; 573 } 574 575 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) 576 { 577 struct drm_device *dev = obj->base.dev; 578 struct drm_i915_private *dev_priv = dev->dev_private; 579 bool interruptible; 580 581 interruptible = do_idling(dev_priv); 582 583 if (!obj->has_dma_mapping) 584 dma_unmap_sg(&dev->pdev->dev, 585 obj->pages->sgl, obj->pages->nents, 586 PCI_DMA_BIDIRECTIONAL); 587 588 undo_idling(dev_priv, interruptible); 589 } 590 591 static void i915_gtt_color_adjust(struct drm_mm_node *node, 592 unsigned long color, 593 unsigned long *start, 594 unsigned long *end) 595 { 596 if (node->color != color) 597 *start += 4096; 598 599 if (!list_empty(&node->node_list)) { 600 node = list_entry(node->node_list.next, 601 struct drm_mm_node, 602 node_list); 603 if (node->allocated && node->color != color) 604 *end -= 4096; 605 } 606 } 607 void i915_gem_setup_global_gtt(struct drm_device *dev, 608 unsigned long start, 609 unsigned long mappable_end, 610 unsigned long end) 611 { 612 /* Let GEM Manage all of the aperture. 613 * 614 * However, leave one page at the end still bound to the scratch page. 615 * There are a number of places where the hardware apparently prefetches 616 * past the end of the object, and we've seen multiple hangs with the 617 * GPU head pointer stuck in a batchbuffer bound at the last page of the 618 * aperture. One page should be enough to keep any prefetching inside 619 * of the aperture. 620 */ 621 drm_i915_private_t *dev_priv = dev->dev_private; 622 struct drm_mm_node *entry; 623 struct drm_i915_gem_object *obj; 624 unsigned long hole_start, hole_end; 625 626 BUG_ON(mappable_end > end); 627 628 /* Subtract the guard page ... */ 629 drm_mm_init(&dev_priv->mm.gtt_space, start, end - start - PAGE_SIZE); 630 if (!HAS_LLC(dev)) 631 dev_priv->mm.gtt_space.color_adjust = i915_gtt_color_adjust; 632 633 /* Mark any preallocated objects as occupied */ 634 list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { 635 DRM_DEBUG_KMS("reserving preallocated space: %x + %zx\n", 636 obj->gtt_offset, obj->base.size); 637 638 BUG_ON(obj->gtt_space != I915_GTT_RESERVED); 639 obj->gtt_space = drm_mm_create_block(&dev_priv->mm.gtt_space, 640 obj->gtt_offset, 641 obj->base.size, 642 false); 643 obj->has_global_gtt_mapping = 1; 644 } 645 646 dev_priv->gtt.start = start; 647 dev_priv->gtt.total = end - start; 648 649 /* Clear any non-preallocated blocks */ 650 drm_mm_for_each_hole(entry, &dev_priv->mm.gtt_space, 651 hole_start, hole_end) { 652 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", 653 hole_start, hole_end); 654 dev_priv->gtt.gtt_clear_range(dev, hole_start / PAGE_SIZE, 655 (hole_end-hole_start) / PAGE_SIZE); 656 } 657 658 /* And finally clear the reserved guard page */ 659 dev_priv->gtt.gtt_clear_range(dev, end / PAGE_SIZE - 1, 1); 660 } 661 662 static bool 663 intel_enable_ppgtt(struct drm_device *dev) 664 { 665 if (i915_enable_ppgtt >= 0) 666 return i915_enable_ppgtt; 667 668 #ifdef CONFIG_INTEL_IOMMU 669 /* Disable ppgtt on SNB if VT-d is on. */ 670 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) 671 return false; 672 #endif 673 674 return true; 675 } 676 677 void i915_gem_init_global_gtt(struct drm_device *dev) 678 { 679 struct drm_i915_private *dev_priv = dev->dev_private; 680 unsigned long gtt_size, mappable_size; 681 682 gtt_size = dev_priv->gtt.total; 683 mappable_size = dev_priv->gtt.mappable_end; 684 685 if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { 686 int ret; 687 688 if (INTEL_INFO(dev)->gen <= 7) { 689 /* PPGTT pdes are stolen from global gtt ptes, so shrink the 690 * aperture accordingly when using aliasing ppgtt. */ 691 gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE; 692 } 693 694 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); 695 696 ret = i915_gem_init_aliasing_ppgtt(dev); 697 if (!ret) 698 return; 699 700 DRM_ERROR("Aliased PPGTT setup failed %d\n", ret); 701 drm_mm_takedown(&dev_priv->mm.gtt_space); 702 gtt_size += I915_PPGTT_PD_ENTRIES*PAGE_SIZE; 703 } 704 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); 705 } 706 707 static int setup_scratch_page(struct drm_device *dev) 708 { 709 struct drm_i915_private *dev_priv = dev->dev_private; 710 struct page *page; 711 dma_addr_t dma_addr; 712 713 page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); 714 if (page == NULL) 715 return -ENOMEM; 716 get_page(page); 717 set_pages_uc(page, 1); 718 719 #ifdef CONFIG_INTEL_IOMMU 720 dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE, 721 PCI_DMA_BIDIRECTIONAL); 722 if (pci_dma_mapping_error(dev->pdev, dma_addr)) 723 return -EINVAL; 724 #else 725 dma_addr = page_to_phys(page); 726 #endif 727 dev_priv->gtt.scratch_page = page; 728 dev_priv->gtt.scratch_page_dma = dma_addr; 729 730 return 0; 731 } 732 733 static void teardown_scratch_page(struct drm_device *dev) 734 { 735 struct drm_i915_private *dev_priv = dev->dev_private; 736 set_pages_wb(dev_priv->gtt.scratch_page, 1); 737 pci_unmap_page(dev->pdev, dev_priv->gtt.scratch_page_dma, 738 PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 739 put_page(dev_priv->gtt.scratch_page); 740 __free_page(dev_priv->gtt.scratch_page); 741 } 742 743 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) 744 { 745 snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; 746 snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; 747 return snb_gmch_ctl << 20; 748 } 749 750 static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl) 751 { 752 snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; 753 snb_gmch_ctl &= SNB_GMCH_GMS_MASK; 754 return snb_gmch_ctl << 25; /* 32 MB units */ 755 } 756 757 static int gen6_gmch_probe(struct drm_device *dev, 758 size_t *gtt_total, 759 size_t *stolen, 760 phys_addr_t *mappable_base, 761 unsigned long *mappable_end) 762 { 763 struct drm_i915_private *dev_priv = dev->dev_private; 764 phys_addr_t gtt_bus_addr; 765 unsigned int gtt_size; 766 u16 snb_gmch_ctl; 767 int ret; 768 769 *mappable_base = pci_resource_start(dev->pdev, 2); 770 *mappable_end = pci_resource_len(dev->pdev, 2); 771 772 /* 64/512MB is the current min/max we actually know of, but this is just 773 * a coarse sanity check. 774 */ 775 if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) { 776 DRM_ERROR("Unknown GMADR size (%lx)\n", 777 dev_priv->gtt.mappable_end); 778 return -ENXIO; 779 } 780 781 if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) 782 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); 783 pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); 784 gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl); 785 786 *stolen = gen6_get_stolen_size(snb_gmch_ctl); 787 *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT; 788 789 /* For Modern GENs the PTEs and register space are split in the BAR */ 790 gtt_bus_addr = pci_resource_start(dev->pdev, 0) + 791 (pci_resource_len(dev->pdev, 0) / 2); 792 793 dev_priv->gtt.gsm = ioremap_wc(gtt_bus_addr, gtt_size); 794 if (!dev_priv->gtt.gsm) { 795 DRM_ERROR("Failed to map the gtt page table\n"); 796 return -ENOMEM; 797 } 798 799 ret = setup_scratch_page(dev); 800 if (ret) 801 DRM_ERROR("Scratch setup failed\n"); 802 803 dev_priv->gtt.gtt_clear_range = gen6_ggtt_clear_range; 804 dev_priv->gtt.gtt_insert_entries = gen6_ggtt_insert_entries; 805 806 return ret; 807 } 808 809 static void gen6_gmch_remove(struct drm_device *dev) 810 { 811 struct drm_i915_private *dev_priv = dev->dev_private; 812 iounmap(dev_priv->gtt.gsm); 813 teardown_scratch_page(dev_priv->dev); 814 } 815 816 static int i915_gmch_probe(struct drm_device *dev, 817 size_t *gtt_total, 818 size_t *stolen, 819 phys_addr_t *mappable_base, 820 unsigned long *mappable_end) 821 { 822 struct drm_i915_private *dev_priv = dev->dev_private; 823 int ret; 824 825 ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL); 826 if (!ret) { 827 DRM_ERROR("failed to set up gmch\n"); 828 return -EIO; 829 } 830 831 intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end); 832 833 dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev); 834 dev_priv->gtt.gtt_clear_range = i915_ggtt_clear_range; 835 dev_priv->gtt.gtt_insert_entries = i915_ggtt_insert_entries; 836 837 return 0; 838 } 839 840 static void i915_gmch_remove(struct drm_device *dev) 841 { 842 intel_gmch_remove(); 843 } 844 845 int i915_gem_gtt_init(struct drm_device *dev) 846 { 847 struct drm_i915_private *dev_priv = dev->dev_private; 848 struct i915_gtt *gtt = &dev_priv->gtt; 849 int ret; 850 851 if (INTEL_INFO(dev)->gen <= 5) { 852 dev_priv->gtt.gtt_probe = i915_gmch_probe; 853 dev_priv->gtt.gtt_remove = i915_gmch_remove; 854 } else { 855 dev_priv->gtt.gtt_probe = gen6_gmch_probe; 856 dev_priv->gtt.gtt_remove = gen6_gmch_remove; 857 if (IS_HASWELL(dev)) { 858 dev_priv->gtt.pte_encode = hsw_pte_encode; 859 } else if (IS_VALLEYVIEW(dev)) { 860 dev_priv->gtt.pte_encode = byt_pte_encode; 861 } else { 862 dev_priv->gtt.pte_encode = gen6_pte_encode; 863 } 864 } 865 866 ret = dev_priv->gtt.gtt_probe(dev, &dev_priv->gtt.total, 867 &dev_priv->gtt.stolen_size, 868 >t->mappable_base, 869 >t->mappable_end); 870 if (ret) 871 return ret; 872 873 /* GMADR is the PCI mmio aperture into the global GTT. */ 874 DRM_INFO("Memory usable by graphics device = %zdM\n", 875 dev_priv->gtt.total >> 20); 876 DRM_DEBUG_DRIVER("GMADR size = %ldM\n", 877 dev_priv->gtt.mappable_end >> 20); 878 DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", 879 dev_priv->gtt.stolen_size >> 20); 880 881 return 0; 882 } 883