1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/slab.h> /* fault-inject.h is not standalone! */ 7 8 #include <linux/fault-inject.h> 9 10 #include "gem/i915_gem_lmem.h" 11 #include "i915_trace.h" 12 #include "intel_gt.h" 13 #include "intel_gtt.h" 14 15 struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz) 16 { 17 struct drm_i915_gem_object *obj; 18 19 /* 20 * To avoid severe over-allocation when dealing with min_page_size 21 * restrictions, we override that behaviour here by allowing an object 22 * size and page layout which can be smaller. In practice this should be 23 * totally fine, since GTT paging structures are not typically inserted 24 * into the GTT. 25 * 26 * Note that we also hit this path for the scratch page, and for this 27 * case it might need to be 64K, but that should work fine here since we 28 * used the passed in size for the page size, which should ensure it 29 * also has the same alignment. 30 */ 31 obj = __i915_gem_object_create_lmem_with_ps(vm->i915, sz, sz, 32 vm->lmem_pt_obj_flags); 33 /* 34 * Ensure all paging structures for this vm share the same dma-resv 35 * object underneath, with the idea that one object_lock() will lock 36 * them all at once. 37 */ 38 if (!IS_ERR(obj)) { 39 obj->base.resv = i915_vm_resv_get(vm); 40 obj->shares_resv_from = vm; 41 } 42 43 return obj; 44 } 45 46 struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz) 47 { 48 struct drm_i915_gem_object *obj; 49 50 if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) 51 i915_gem_shrink_all(vm->i915); 52 53 obj = i915_gem_object_create_internal(vm->i915, sz); 54 /* 55 * Ensure all paging structures for this vm share the same dma-resv 56 * object underneath, with the idea that one object_lock() will lock 57 * them all at once. 58 */ 59 if (!IS_ERR(obj)) { 60 obj->base.resv = i915_vm_resv_get(vm); 61 obj->shares_resv_from = vm; 62 } 63 64 return obj; 65 } 66 67 int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj) 68 { 69 enum i915_map_type type; 70 void *vaddr; 71 72 type = i915_coherent_map_type(vm->i915, obj, true); 73 vaddr = i915_gem_object_pin_map_unlocked(obj, type); 74 if (IS_ERR(vaddr)) 75 return PTR_ERR(vaddr); 76 77 i915_gem_object_make_unshrinkable(obj); 78 return 0; 79 } 80 81 int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object *obj) 82 { 83 enum i915_map_type type; 84 void *vaddr; 85 86 type = i915_coherent_map_type(vm->i915, obj, true); 87 vaddr = i915_gem_object_pin_map(obj, type); 88 if (IS_ERR(vaddr)) 89 return PTR_ERR(vaddr); 90 91 i915_gem_object_make_unshrinkable(obj); 92 return 0; 93 } 94 95 void __i915_vm_close(struct i915_address_space *vm) 96 { 97 struct i915_vma *vma, *vn; 98 99 if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex)) 100 return; 101 102 list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { 103 struct drm_i915_gem_object *obj = vma->obj; 104 105 /* Keep the obj (and hence the vma) alive as _we_ destroy it */ 106 if (!kref_get_unless_zero(&obj->base.refcount)) 107 continue; 108 109 atomic_and(~I915_VMA_PIN_MASK, &vma->flags); 110 WARN_ON(__i915_vma_unbind(vma)); 111 __i915_vma_put(vma); 112 113 i915_gem_object_put(obj); 114 } 115 GEM_BUG_ON(!list_empty(&vm->bound_list)); 116 117 mutex_unlock(&vm->mutex); 118 } 119 120 /* lock the vm into the current ww, if we lock one, we lock all */ 121 int i915_vm_lock_objects(struct i915_address_space *vm, 122 struct i915_gem_ww_ctx *ww) 123 { 124 if (vm->scratch[0]->base.resv == &vm->_resv) { 125 return i915_gem_object_lock(vm->scratch[0], ww); 126 } else { 127 struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 128 129 /* We borrowed the scratch page from ggtt, take the top level object */ 130 return i915_gem_object_lock(ppgtt->pd->pt.base, ww); 131 } 132 } 133 134 void i915_address_space_fini(struct i915_address_space *vm) 135 { 136 drm_mm_takedown(&vm->mm); 137 mutex_destroy(&vm->mutex); 138 } 139 140 /** 141 * i915_vm_resv_release - Final struct i915_address_space destructor 142 * @kref: Pointer to the &i915_address_space.resv_ref member. 143 * 144 * This function is called when the last lock sharer no longer shares the 145 * &i915_address_space._resv lock. 146 */ 147 void i915_vm_resv_release(struct kref *kref) 148 { 149 struct i915_address_space *vm = 150 container_of(kref, typeof(*vm), resv_ref); 151 152 dma_resv_fini(&vm->_resv); 153 kfree(vm); 154 } 155 156 static void __i915_vm_release(struct work_struct *work) 157 { 158 struct i915_address_space *vm = 159 container_of(work, struct i915_address_space, release_work); 160 161 vm->cleanup(vm); 162 i915_address_space_fini(vm); 163 164 i915_vm_resv_put(vm); 165 } 166 167 void i915_vm_release(struct kref *kref) 168 { 169 struct i915_address_space *vm = 170 container_of(kref, struct i915_address_space, ref); 171 172 GEM_BUG_ON(i915_is_ggtt(vm)); 173 trace_i915_ppgtt_release(vm); 174 175 queue_work(vm->i915->wq, &vm->release_work); 176 } 177 178 void i915_address_space_init(struct i915_address_space *vm, int subclass) 179 { 180 kref_init(&vm->ref); 181 182 /* 183 * Special case for GGTT that has already done an early 184 * kref_init here. 185 */ 186 if (!kref_read(&vm->resv_ref)) 187 kref_init(&vm->resv_ref); 188 189 INIT_WORK(&vm->release_work, __i915_vm_release); 190 atomic_set(&vm->open, 1); 191 192 /* 193 * The vm->mutex must be reclaim safe (for use in the shrinker). 194 * Do a dummy acquire now under fs_reclaim so that any allocation 195 * attempt holding the lock is immediately reported by lockdep. 196 */ 197 mutex_init(&vm->mutex); 198 lockdep_set_subclass(&vm->mutex, subclass); 199 200 if (!intel_vm_no_concurrent_access_wa(vm->i915)) { 201 i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex); 202 } else { 203 /* 204 * CHV + BXT VTD workaround use stop_machine(), 205 * which is allowed to allocate memory. This means &vm->mutex 206 * is the outer lock, and in theory we can allocate memory inside 207 * it through stop_machine(). 208 * 209 * Add the annotation for this, we use trylock in shrinker. 210 */ 211 mutex_acquire(&vm->mutex.dep_map, 0, 0, _THIS_IP_); 212 might_alloc(GFP_KERNEL); 213 mutex_release(&vm->mutex.dep_map, _THIS_IP_); 214 } 215 dma_resv_init(&vm->_resv); 216 217 GEM_BUG_ON(!vm->total); 218 drm_mm_init(&vm->mm, 0, vm->total); 219 vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; 220 221 INIT_LIST_HEAD(&vm->bound_list); 222 } 223 224 void clear_pages(struct i915_vma *vma) 225 { 226 GEM_BUG_ON(!vma->pages); 227 228 if (vma->pages != vma->obj->mm.pages) { 229 sg_free_table(vma->pages); 230 kfree(vma->pages); 231 } 232 vma->pages = NULL; 233 234 memset(&vma->page_sizes, 0, sizeof(vma->page_sizes)); 235 } 236 237 void *__px_vaddr(struct drm_i915_gem_object *p) 238 { 239 enum i915_map_type type; 240 241 GEM_BUG_ON(!i915_gem_object_has_pages(p)); 242 return page_unpack_bits(p->mm.mapping, &type); 243 } 244 245 dma_addr_t __px_dma(struct drm_i915_gem_object *p) 246 { 247 GEM_BUG_ON(!i915_gem_object_has_pages(p)); 248 return sg_dma_address(p->mm.pages->sgl); 249 } 250 251 struct page *__px_page(struct drm_i915_gem_object *p) 252 { 253 GEM_BUG_ON(!i915_gem_object_has_pages(p)); 254 return sg_page(p->mm.pages->sgl); 255 } 256 257 void 258 fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count) 259 { 260 void *vaddr = __px_vaddr(p); 261 262 memset64(vaddr, val, count); 263 clflush_cache_range(vaddr, PAGE_SIZE); 264 } 265 266 static void poison_scratch_page(struct drm_i915_gem_object *scratch) 267 { 268 void *vaddr = __px_vaddr(scratch); 269 u8 val; 270 271 val = 0; 272 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) 273 val = POISON_FREE; 274 275 memset(vaddr, val, scratch->base.size); 276 } 277 278 int setup_scratch_page(struct i915_address_space *vm) 279 { 280 unsigned long size; 281 282 /* 283 * In order to utilize 64K pages for an object with a size < 2M, we will 284 * need to support a 64K scratch page, given that every 16th entry for a 285 * page-table operating in 64K mode must point to a properly aligned 64K 286 * region, including any PTEs which happen to point to scratch. 287 * 288 * This is only relevant for the 48b PPGTT where we support 289 * huge-gtt-pages, see also i915_vma_insert(). However, as we share the 290 * scratch (read-only) between all vm, we create one 64k scratch page 291 * for all. 292 */ 293 size = I915_GTT_PAGE_SIZE_4K; 294 if (i915_vm_is_4lvl(vm) && 295 HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) 296 size = I915_GTT_PAGE_SIZE_64K; 297 298 do { 299 struct drm_i915_gem_object *obj; 300 301 obj = vm->alloc_pt_dma(vm, size); 302 if (IS_ERR(obj)) 303 goto skip; 304 305 if (map_pt_dma(vm, obj)) 306 goto skip_obj; 307 308 /* We need a single contiguous page for our scratch */ 309 if (obj->mm.page_sizes.sg < size) 310 goto skip_obj; 311 312 /* And it needs to be correspondingly aligned */ 313 if (__px_dma(obj) & (size - 1)) 314 goto skip_obj; 315 316 /* 317 * Use a non-zero scratch page for debugging. 318 * 319 * We want a value that should be reasonably obvious 320 * to spot in the error state, while also causing a GPU hang 321 * if executed. We prefer using a clear page in production, so 322 * should it ever be accidentally used, the effect should be 323 * fairly benign. 324 */ 325 poison_scratch_page(obj); 326 327 vm->scratch[0] = obj; 328 vm->scratch_order = get_order(size); 329 return 0; 330 331 skip_obj: 332 i915_gem_object_put(obj); 333 skip: 334 if (size == I915_GTT_PAGE_SIZE_4K) 335 return -ENOMEM; 336 337 size = I915_GTT_PAGE_SIZE_4K; 338 } while (1); 339 } 340 341 void free_scratch(struct i915_address_space *vm) 342 { 343 int i; 344 345 for (i = 0; i <= vm->top; i++) 346 i915_gem_object_put(vm->scratch[i]); 347 } 348 349 void gtt_write_workarounds(struct intel_gt *gt) 350 { 351 struct drm_i915_private *i915 = gt->i915; 352 struct intel_uncore *uncore = gt->uncore; 353 354 /* 355 * This function is for gtt related workarounds. This function is 356 * called on driver load and after a GPU reset, so you can place 357 * workarounds here even if they get overwritten by GPU reset. 358 */ 359 /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ 360 if (IS_BROADWELL(i915)) 361 intel_uncore_write(uncore, 362 GEN8_L3_LRA_1_GPGPU, 363 GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); 364 else if (IS_CHERRYVIEW(i915)) 365 intel_uncore_write(uncore, 366 GEN8_L3_LRA_1_GPGPU, 367 GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); 368 else if (IS_GEN9_LP(i915)) 369 intel_uncore_write(uncore, 370 GEN8_L3_LRA_1_GPGPU, 371 GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); 372 else if (GRAPHICS_VER(i915) >= 9 && GRAPHICS_VER(i915) <= 11) 373 intel_uncore_write(uncore, 374 GEN8_L3_LRA_1_GPGPU, 375 GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); 376 377 /* 378 * To support 64K PTEs we need to first enable the use of the 379 * Intermediate-Page-Size(IPS) bit of the PDE field via some magical 380 * mmio, otherwise the page-walker will simply ignore the IPS bit. This 381 * shouldn't be needed after GEN10. 382 * 383 * 64K pages were first introduced from BDW+, although technically they 384 * only *work* from gen9+. For pre-BDW we instead have the option for 385 * 32K pages, but we don't currently have any support for it in our 386 * driver. 387 */ 388 if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) && 389 GRAPHICS_VER(i915) <= 10) 390 intel_uncore_rmw(uncore, 391 GEN8_GAMW_ECO_DEV_RW_IA, 392 0, 393 GAMW_ECO_ENABLE_64K_IPS_FIELD); 394 395 if (IS_GRAPHICS_VER(i915, 8, 11)) { 396 bool can_use_gtt_cache = true; 397 398 /* 399 * According to the BSpec if we use 2M/1G pages then we also 400 * need to disable the GTT cache. At least on BDW we can see 401 * visual corruption when using 2M pages, and not disabling the 402 * GTT cache. 403 */ 404 if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M)) 405 can_use_gtt_cache = false; 406 407 /* WaGttCachingOffByDefault */ 408 intel_uncore_write(uncore, 409 HSW_GTT_CACHE_EN, 410 can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0); 411 drm_WARN_ON_ONCE(&i915->drm, can_use_gtt_cache && 412 intel_uncore_read(uncore, 413 HSW_GTT_CACHE_EN) == 0); 414 } 415 } 416 417 static void tgl_setup_private_ppat(struct intel_uncore *uncore) 418 { 419 /* TGL doesn't support LLC or AGE settings */ 420 intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB); 421 intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC); 422 intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT); 423 intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC); 424 intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB); 425 intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB); 426 intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB); 427 intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB); 428 } 429 430 static void icl_setup_private_ppat(struct intel_uncore *uncore) 431 { 432 intel_uncore_write(uncore, 433 GEN10_PAT_INDEX(0), 434 GEN8_PPAT_WB | GEN8_PPAT_LLC); 435 intel_uncore_write(uncore, 436 GEN10_PAT_INDEX(1), 437 GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); 438 intel_uncore_write(uncore, 439 GEN10_PAT_INDEX(2), 440 GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); 441 intel_uncore_write(uncore, 442 GEN10_PAT_INDEX(3), 443 GEN8_PPAT_UC); 444 intel_uncore_write(uncore, 445 GEN10_PAT_INDEX(4), 446 GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); 447 intel_uncore_write(uncore, 448 GEN10_PAT_INDEX(5), 449 GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); 450 intel_uncore_write(uncore, 451 GEN10_PAT_INDEX(6), 452 GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)); 453 intel_uncore_write(uncore, 454 GEN10_PAT_INDEX(7), 455 GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 456 } 457 458 /* 459 * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability 460 * bits. When using advanced contexts each context stores its own PAT, but 461 * writing this data shouldn't be harmful even in those cases. 462 */ 463 static void bdw_setup_private_ppat(struct intel_uncore *uncore) 464 { 465 struct drm_i915_private *i915 = uncore->i915; 466 u64 pat; 467 468 pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ 469 GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ 470 GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ 471 GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | 472 GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | 473 GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | 474 GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); 475 476 /* for scanout with eLLC */ 477 if (GRAPHICS_VER(i915) >= 9) 478 pat |= GEN8_PPAT(2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); 479 else 480 pat |= GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); 481 482 intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); 483 intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); 484 } 485 486 static void chv_setup_private_ppat(struct intel_uncore *uncore) 487 { 488 u64 pat; 489 490 /* 491 * Map WB on BDW to snooped on CHV. 492 * 493 * Only the snoop bit has meaning for CHV, the rest is 494 * ignored. 495 * 496 * The hardware will never snoop for certain types of accesses: 497 * - CPU GTT (GMADR->GGTT->no snoop->memory) 498 * - PPGTT page tables 499 * - some other special cycles 500 * 501 * As with BDW, we also need to consider the following for GT accesses: 502 * "For GGTT, there is NO pat_sel[2:0] from the entry, 503 * so RTL will always use the value corresponding to 504 * pat_sel = 000". 505 * Which means we must set the snoop bit in PAT entry 0 506 * in order to keep the global status page working. 507 */ 508 509 pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | 510 GEN8_PPAT(1, 0) | 511 GEN8_PPAT(2, 0) | 512 GEN8_PPAT(3, 0) | 513 GEN8_PPAT(4, CHV_PPAT_SNOOP) | 514 GEN8_PPAT(5, CHV_PPAT_SNOOP) | 515 GEN8_PPAT(6, CHV_PPAT_SNOOP) | 516 GEN8_PPAT(7, CHV_PPAT_SNOOP); 517 518 intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat)); 519 intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); 520 } 521 522 void setup_private_pat(struct intel_uncore *uncore) 523 { 524 struct drm_i915_private *i915 = uncore->i915; 525 526 GEM_BUG_ON(GRAPHICS_VER(i915) < 8); 527 528 if (GRAPHICS_VER(i915) >= 12) 529 tgl_setup_private_ppat(uncore); 530 else if (GRAPHICS_VER(i915) >= 11) 531 icl_setup_private_ppat(uncore); 532 else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915)) 533 chv_setup_private_ppat(uncore); 534 else 535 bdw_setup_private_ppat(uncore); 536 } 537 538 struct i915_vma * 539 __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size) 540 { 541 struct drm_i915_gem_object *obj; 542 struct i915_vma *vma; 543 544 obj = i915_gem_object_create_internal(vm->i915, PAGE_ALIGN(size)); 545 if (IS_ERR(obj)) 546 return ERR_CAST(obj); 547 548 i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); 549 550 vma = i915_vma_instance(obj, vm, NULL); 551 if (IS_ERR(vma)) { 552 i915_gem_object_put(obj); 553 return vma; 554 } 555 556 return vma; 557 } 558 559 struct i915_vma * 560 __vm_create_scratch_for_read_pinned(struct i915_address_space *vm, unsigned long size) 561 { 562 struct i915_vma *vma; 563 int err; 564 565 vma = __vm_create_scratch_for_read(vm, size); 566 if (IS_ERR(vma)) 567 return vma; 568 569 err = i915_vma_pin(vma, 0, 0, 570 i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); 571 if (err) { 572 i915_vma_put(vma); 573 return ERR_PTR(err); 574 } 575 576 return vma; 577 } 578 579 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 580 #include "selftests/mock_gtt.c" 581 #endif 582