1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/slab.h> /* fault-inject.h is not standalone! */ 7 8 #include <linux/fault-inject.h> 9 10 #include "i915_trace.h" 11 #include "intel_gt.h" 12 #include "intel_gtt.h" 13 14 void stash_init(struct pagestash *stash) 15 { 16 pagevec_init(&stash->pvec); 17 spin_lock_init(&stash->lock); 18 } 19 20 static struct page *stash_pop_page(struct pagestash *stash) 21 { 22 struct page *page = NULL; 23 24 spin_lock(&stash->lock); 25 if (likely(stash->pvec.nr)) 26 page = stash->pvec.pages[--stash->pvec.nr]; 27 spin_unlock(&stash->lock); 28 29 return page; 30 } 31 32 static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec) 33 { 34 unsigned int nr; 35 36 spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING); 37 38 nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec)); 39 memcpy(stash->pvec.pages + stash->pvec.nr, 40 pvec->pages + pvec->nr - nr, 41 sizeof(pvec->pages[0]) * nr); 42 stash->pvec.nr += nr; 43 44 spin_unlock(&stash->lock); 45 46 pvec->nr -= nr; 47 } 48 49 static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) 50 { 51 struct pagevec stack; 52 struct page *page; 53 54 if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) 55 i915_gem_shrink_all(vm->i915); 56 57 page = stash_pop_page(&vm->free_pages); 58 if (page) 59 return page; 60 61 if (!vm->pt_kmap_wc) 62 return alloc_page(gfp); 63 64 /* Look in our global stash of WC pages... */ 65 page = stash_pop_page(&vm->i915->mm.wc_stash); 66 if (page) 67 return page; 68 69 /* 70 * Otherwise batch allocate pages to amortize cost of set_pages_wc. 71 * 72 * We have to be careful as page allocation may trigger the shrinker 73 * (via direct reclaim) which will fill up the WC stash underneath us. 74 * So we add our WB pages into a temporary pvec on the stack and merge 75 * them into the WC stash after all the allocations are complete. 76 */ 77 pagevec_init(&stack); 78 do { 79 struct page *page; 80 81 page = alloc_page(gfp); 82 if (unlikely(!page)) 83 break; 84 85 stack.pages[stack.nr++] = page; 86 } while (pagevec_space(&stack)); 87 88 if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) { 89 page = stack.pages[--stack.nr]; 90 91 /* Merge spare WC pages to the global stash */ 92 if (stack.nr) 93 stash_push_pagevec(&vm->i915->mm.wc_stash, &stack); 94 95 /* Push any surplus WC pages onto the local VM stash */ 96 if (stack.nr) 97 stash_push_pagevec(&vm->free_pages, &stack); 98 } 99 100 /* Return unwanted leftovers */ 101 if (unlikely(stack.nr)) { 102 WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr)); 103 __pagevec_release(&stack); 104 } 105 106 return page; 107 } 108 109 static void vm_free_pages_release(struct i915_address_space *vm, 110 bool immediate) 111 { 112 struct pagevec *pvec = &vm->free_pages.pvec; 113 struct pagevec stack; 114 115 lockdep_assert_held(&vm->free_pages.lock); 116 GEM_BUG_ON(!pagevec_count(pvec)); 117 118 if (vm->pt_kmap_wc) { 119 /* 120 * When we use WC, first fill up the global stash and then 121 * only if full immediately free the overflow. 122 */ 123 stash_push_pagevec(&vm->i915->mm.wc_stash, pvec); 124 125 /* 126 * As we have made some room in the VM's free_pages, 127 * we can wait for it to fill again. Unless we are 128 * inside i915_address_space_fini() and must 129 * immediately release the pages! 130 */ 131 if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1)) 132 return; 133 134 /* 135 * We have to drop the lock to allow ourselves to sleep, 136 * so take a copy of the pvec and clear the stash for 137 * others to use it as we sleep. 138 */ 139 stack = *pvec; 140 pagevec_reinit(pvec); 141 spin_unlock(&vm->free_pages.lock); 142 143 pvec = &stack; 144 set_pages_array_wb(pvec->pages, pvec->nr); 145 146 spin_lock(&vm->free_pages.lock); 147 } 148 149 __pagevec_release(pvec); 150 } 151 152 static void vm_free_page(struct i915_address_space *vm, struct page *page) 153 { 154 /* 155 * On !llc, we need to change the pages back to WB. We only do so 156 * in bulk, so we rarely need to change the page attributes here, 157 * but doing so requires a stop_machine() from deep inside arch/x86/mm. 158 * To make detection of the possible sleep more likely, use an 159 * unconditional might_sleep() for everybody. 160 */ 161 might_sleep(); 162 spin_lock(&vm->free_pages.lock); 163 while (!pagevec_space(&vm->free_pages.pvec)) 164 vm_free_pages_release(vm, false); 165 GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE); 166 pagevec_add(&vm->free_pages.pvec, page); 167 spin_unlock(&vm->free_pages.lock); 168 } 169 170 void __i915_vm_close(struct i915_address_space *vm) 171 { 172 struct i915_vma *vma, *vn; 173 174 mutex_lock(&vm->mutex); 175 list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { 176 struct drm_i915_gem_object *obj = vma->obj; 177 178 /* Keep the obj (and hence the vma) alive as _we_ destroy it */ 179 if (!kref_get_unless_zero(&obj->base.refcount)) 180 continue; 181 182 atomic_and(~I915_VMA_PIN_MASK, &vma->flags); 183 WARN_ON(__i915_vma_unbind(vma)); 184 __i915_vma_put(vma); 185 186 i915_gem_object_put(obj); 187 } 188 GEM_BUG_ON(!list_empty(&vm->bound_list)); 189 mutex_unlock(&vm->mutex); 190 } 191 192 void i915_address_space_fini(struct i915_address_space *vm) 193 { 194 spin_lock(&vm->free_pages.lock); 195 if (pagevec_count(&vm->free_pages.pvec)) 196 vm_free_pages_release(vm, true); 197 GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec)); 198 spin_unlock(&vm->free_pages.lock); 199 200 drm_mm_takedown(&vm->mm); 201 202 mutex_destroy(&vm->mutex); 203 } 204 205 static void __i915_vm_release(struct work_struct *work) 206 { 207 struct i915_address_space *vm = 208 container_of(work, struct i915_address_space, rcu.work); 209 210 vm->cleanup(vm); 211 i915_address_space_fini(vm); 212 213 kfree(vm); 214 } 215 216 void i915_vm_release(struct kref *kref) 217 { 218 struct i915_address_space *vm = 219 container_of(kref, struct i915_address_space, ref); 220 221 GEM_BUG_ON(i915_is_ggtt(vm)); 222 trace_i915_ppgtt_release(vm); 223 224 queue_rcu_work(vm->i915->wq, &vm->rcu); 225 } 226 227 void i915_address_space_init(struct i915_address_space *vm, int subclass) 228 { 229 kref_init(&vm->ref); 230 INIT_RCU_WORK(&vm->rcu, __i915_vm_release); 231 atomic_set(&vm->open, 1); 232 233 /* 234 * The vm->mutex must be reclaim safe (for use in the shrinker). 235 * Do a dummy acquire now under fs_reclaim so that any allocation 236 * attempt holding the lock is immediately reported by lockdep. 237 */ 238 mutex_init(&vm->mutex); 239 lockdep_set_subclass(&vm->mutex, subclass); 240 i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); 241 242 GEM_BUG_ON(!vm->total); 243 drm_mm_init(&vm->mm, 0, vm->total); 244 vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; 245 246 stash_init(&vm->free_pages); 247 248 INIT_LIST_HEAD(&vm->bound_list); 249 } 250 251 void clear_pages(struct i915_vma *vma) 252 { 253 GEM_BUG_ON(!vma->pages); 254 255 if (vma->pages != vma->obj->mm.pages) { 256 sg_free_table(vma->pages); 257 kfree(vma->pages); 258 } 259 vma->pages = NULL; 260 261 memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); 262 } 263 264 static int __setup_page_dma(struct i915_address_space *vm, 265 struct i915_page_dma *p, 266 gfp_t gfp) 267 { 268 p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL); 269 if (unlikely(!p->page)) 270 return -ENOMEM; 271 272 p->daddr = dma_map_page_attrs(vm->dma, 273 p->page, 0, PAGE_SIZE, 274 PCI_DMA_BIDIRECTIONAL, 275 DMA_ATTR_SKIP_CPU_SYNC | 276 DMA_ATTR_NO_WARN); 277 if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { 278 vm_free_page(vm, p->page); 279 return -ENOMEM; 280 } 281 282 return 0; 283 } 284 285 int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) 286 { 287 return __setup_page_dma(vm, p, __GFP_HIGHMEM); 288 } 289 290 void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) 291 { 292 dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); 293 vm_free_page(vm, p->page); 294 } 295 296 void 297 fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count) 298 { 299 kunmap_atomic(memset64(kmap_atomic(p->page), val, count)); 300 } 301 302 int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) 303 { 304 unsigned long size; 305 306 /* 307 * In order to utilize 64K pages for an object with a size < 2M, we will 308 * need to support a 64K scratch page, given that every 16th entry for a 309 * page-table operating in 64K mode must point to a properly aligned 64K 310 * region, including any PTEs which happen to point to scratch. 311 * 312 * This is only relevant for the 48b PPGTT where we support 313 * huge-gtt-pages, see also i915_vma_insert(). However, as we share the 314 * scratch (read-only) between all vm, we create one 64k scratch page 315 * for all. 316 */ 317 size = I915_GTT_PAGE_SIZE_4K; 318 if (i915_vm_is_4lvl(vm) && 319 HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) { 320 size = I915_GTT_PAGE_SIZE_64K; 321 gfp |= __GFP_NOWARN; 322 } 323 gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL; 324 325 do { 326 unsigned int order = get_order(size); 327 struct page *page; 328 dma_addr_t addr; 329 330 page = alloc_pages(gfp, order); 331 if (unlikely(!page)) 332 goto skip; 333 334 addr = dma_map_page_attrs(vm->dma, 335 page, 0, size, 336 PCI_DMA_BIDIRECTIONAL, 337 DMA_ATTR_SKIP_CPU_SYNC | 338 DMA_ATTR_NO_WARN); 339 if (unlikely(dma_mapping_error(vm->dma, addr))) 340 goto free_page; 341 342 if (unlikely(!IS_ALIGNED(addr, size))) 343 goto unmap_page; 344 345 vm->scratch[0].base.page = page; 346 vm->scratch[0].base.daddr = addr; 347 vm->scratch_order = order; 348 return 0; 349 350 unmap_page: 351 dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL); 352 free_page: 353 __free_pages(page, order); 354 skip: 355 if (size == I915_GTT_PAGE_SIZE_4K) 356 return -ENOMEM; 357 358 size = I915_GTT_PAGE_SIZE_4K; 359 gfp &= ~__GFP_NOWARN; 360 } while (1); 361 } 362 363 void cleanup_scratch_page(struct i915_address_space *vm) 364 { 365 struct i915_page_dma *p = px_base(&vm->scratch[0]); 366 unsigned int order = vm->scratch_order; 367 368 dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT, 369 PCI_DMA_BIDIRECTIONAL); 370 __free_pages(p->page, order); 371 } 372 373 void free_scratch(struct i915_address_space *vm) 374 { 375 int i; 376 377 if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */ 378 return; 379 380 for (i = 1; i <= vm->top; i++) { 381 if (!px_dma(&vm->scratch[i])) 382 break; 383 cleanup_page_dma(vm, px_base(&vm->scratch[i])); 384 } 385 386 cleanup_scratch_page(vm); 387 } 388 389 void gtt_write_workarounds(struct intel_gt *gt) 390 { 391 struct drm_i915_private *i915 = gt->i915; 392 struct intel_uncore *uncore = gt->uncore; 393 394 /* 395 * This function is for gtt related workarounds. This function is 396 * called on driver load and after a GPU reset, so you can place 397 * workarounds here even if they get overwritten by GPU reset. 398 */ 399 /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ 400 if (IS_BROADWELL(i915)) 401 intel_uncore_write(uncore, 402 GEN8_L3_LRA_1_GPGPU, 403 GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 404 else if (IS_CHERRYVIEW(i915)) 405 intel_uncore_write(uncore, 406 GEN8_L3_LRA_1_GPGPU, 407 GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 408 else if (IS_GEN9_LP(i915)) 409 intel_uncore_write(uncore, 410 GEN8_L3_LRA_1_GPGPU, 411 GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 412 else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11) 413 intel_uncore_write(uncore, 414 GEN8_L3_LRA_1_GPGPU, 415 GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 416 417 /* 418 * To support 64K PTEs we need to first enable the use of the 419 * Intermediate-Page-Size(IPS) bit of the PDE field via some magical 420 * mmio, otherwise the page-walker will simply ignore the IPS bit. This 421 * shouldn't be needed after GEN10. 422 * 423 * 64K pages were first introduced from BDW+, although technically they 424 * only *work* from gen9+. For pre-BDW we instead have the option for 425 * 32K pages, but we don't currently have any support for it in our 426 * driver. 427 */ 428 if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) && 429 INTEL_GEN(i915) <= 10) 430 intel_uncore_rmw(uncore, 431 GEN8_GAMW_ECO_DEV_RW_IA, 432 0, 433 GAMW_ECO_ENABLE_64K_IPS_FIELD); 434 435 if (IS_GEN_RANGE(i915, 8, 11)) { 436 bool can_use_gtt_cache = true; 437 438 /* 439 * According to the BSpec if we use 2M/1G pages then we also 440 * need to disable the GTT cache. At least on BDW we can see 441 * visual corruption when using 2M pages, and not disabling the 442 * GTT cache. 443 */ 444 if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M)) 445 can_use_gtt_cache = false; 446 447 /* WaGttCachingOffByDefault */ 448 intel_uncore_write(uncore, 449 HSW_GTT_CACHE_EN, 450 can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); 451 WARN_ON_ONCE(can_use_gtt_cache && 452 intel_uncore_read(uncore, 453 HSW_GTT_CACHE_EN) == 0); 454 } 455 } 456 457 u64 gen8_pte_encode(dma_addr_t addr, 458 enum i915_cache_level level, 459 u32 flags) 460 { 461 gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; 462 463 if (unlikely(flags & PTE_READ_ONLY)) 464 pte &= ~_PAGE_RW; 465 466 switch (level) { 467 case I915_CACHE_NONE: 468 pte |= PPAT_UNCACHED; 469 break; 470 case I915_CACHE_WT: 471 pte |= PPAT_DISPLAY_ELLC; 472 break; 473 default: 474 pte |= PPAT_CACHED; 475 break; 476 } 477 478 return pte; 479 } 480 481 static void tgl_setup_private_ppat(struct intel_uncore *uncore) 482 { 483 /* TGL doesn't support LLC or AGE settings */ 484 intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); 485 intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); 486 intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); 487 intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); 488 intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); 489 intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); 490 intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); 491 intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); 492 } 493 494 static void cnl_setup_private_ppat(struct intel_uncore *uncore) 495 { 496 intel_uncore_write(uncore, 497 GEN10_PAT_INDEX(0), 498 GEN8_PPAT_WB | GEN8_PPAT_LLC); 499 intel_uncore_write(uncore, 500 GEN10_PAT_INDEX(1), 501 GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); 502 intel_uncore_write(uncore, 503 GEN10_PAT_INDEX(2), 504 GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); 505 intel_uncore_write(uncore, 506 GEN10_PAT_INDEX(3), 507 GEN8_PPAT_UC); 508 intel_uncore_write(uncore, 509 GEN10_PAT_INDEX(4), 510 GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); 511 intel_uncore_write(uncore, 512 GEN10_PAT_INDEX(5), 513 GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); 514 intel_uncore_write(uncore, 515 GEN10_PAT_INDEX(6), 516 GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); 517 intel_uncore_write(uncore, 518 GEN10_PAT_INDEX(7), 519 GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 520 } 521 522 /* 523 * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 524 * bits. When using advanced contexts each context stores its own PAT, but 525 * writing this data shouldn't be harmful even in those cases. 526 */ 527 static void bdw_setup_private_ppat(struct intel_uncore *uncore) 528 { 529 u64 pat; 530 531 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 532 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 533 GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ 534 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 535 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 536 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 537 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 538 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 539 540 intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); 541 intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); 542 } 543 544 static void chv_setup_private_ppat(struct intel_uncore *uncore) 545 { 546 u64 pat; 547 548 /* 549 * Map WB on BDW to snooped on CHV. 550 * 551 * Only the snoop bit has meaning for CHV, the rest is 552 * ignored. 553 * 554 * The hardware will never snoop for certain types of accesses: 555 * - CPU GTT (GMADR->GGTT->no snoop->memory) 556 * - PPGTT page tables 557 * - some other special cycles 558 * 559 * As with BDW, we also need to consider the following for GT accesses: 560 * "For GGTT, there is NO pat_sel[2:0] from the entry, 561 * so RTL will always use the value corresponding to 562 * pat_sel = 000". 563 * Which means we must set the snoop bit in PAT entry 0 564 * in order to keep the global status page working. 565 */ 566 567 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 568 GEN8_PPAT(1, 0) | 569 GEN8_PPAT(2, 0) | 570 GEN8_PPAT(3, 0) | 571 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 572 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 573 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 574 GEN8_PPAT(7, CHV_PPAT_SNOOP); 575 576 intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); 577 intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); 578 } 579 580 void setup_private_pat(struct intel_uncore *uncore) 581 { 582 struct drm_i915_private *i915 = uncore->i915; 583 584 GEM_BUG_ON(INTEL_GEN(i915) < 8); 585 586 if (INTEL_GEN(i915) >= 12) 587 tgl_setup_private_ppat(uncore); 588 else if (INTEL_GEN(i915) >= 10) 589 cnl_setup_private_ppat(uncore); 590 else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) 591 chv_setup_private_ppat(uncore); 592 else 593 bdw_setup_private_ppat(uncore); 594 } 595 596 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 597 #include "selftests/mock_gtt.c" 598 #endif 599