1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/log2.h> 7 8 #include "gem/i915_gem_lmem.h" 9 10 #include "gen8_ppgtt.h" 11 #include "i915_scatterlist.h" 12 #include "i915_trace.h" 13 #include "i915_pvinfo.h" 14 #include "i915_vgpu.h" 15 #include "intel_gt.h" 16 #include "intel_gtt.h" 17 18 static u64 gen8_pde_encode(const dma_addr_t addr, 19 const enum i915_cache_level level) 20 { 21 u64 pde = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; 22 23 if (level != I915_CACHE_NONE) 24 pde |= PPAT_CACHED_PDE; 25 else 26 pde |= PPAT_UNCACHED; 27 28 return pde; 29 } 30 31 static u64 gen8_pte_encode(dma_addr_t addr, 32 enum i915_cache_level level, 33 u32 flags) 34 { 35 gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; 36 37 if (unlikely(flags & PTE_READ_ONLY)) 38 pte &= ~GEN8_PAGE_RW; 39 40 if (flags & PTE_LM) 41 pte |= GEN12_PPGTT_PTE_LM; 42 43 switch (level) { 44 case I915_CACHE_NONE: 45 pte |= PPAT_UNCACHED; 46 break; 47 case I915_CACHE_WT: 48 pte |= PPAT_DISPLAY_ELLC; 49 break; 50 default: 51 pte |= PPAT_CACHED; 52 break; 53 } 54 55 return pte; 56 } 57 58 static u64 mtl_pte_encode(dma_addr_t addr, 59 enum i915_cache_level level, 60 u32 flags) 61 { 62 gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; 63 64 if (unlikely(flags & PTE_READ_ONLY)) 65 pte &= ~GEN8_PAGE_RW; 66 67 if (flags & PTE_LM) 68 pte |= GEN12_PPGTT_PTE_LM; 69 70 switch (level) { 71 case I915_CACHE_NONE: 72 pte |= GEN12_PPGTT_PTE_PAT1; 73 break; 74 case I915_CACHE_LLC: 75 case I915_CACHE_L3_LLC: 76 pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1; 77 break; 78 case I915_CACHE_WT: 79 pte |= GEN12_PPGTT_PTE_PAT0; 80 break; 81 } 82 83 return pte; 84 } 85 86 static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) 87 { 88 struct drm_i915_private *i915 = ppgtt->vm.i915; 89 struct intel_uncore *uncore = ppgtt->vm.gt->uncore; 90 enum vgt_g2v_type msg; 91 int i; 92 93 if (create) 94 atomic_inc(px_used(ppgtt->pd)); /* never remove */ 95 else 96 atomic_dec(px_used(ppgtt->pd)); 97 98 mutex_lock(&i915->vgpu.lock); 99 100 if (i915_vm_is_4lvl(&ppgtt->vm)) { 101 const u64 daddr = px_dma(ppgtt->pd); 102 103 intel_uncore_write(uncore, 104 vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); 105 intel_uncore_write(uncore, 106 vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); 107 108 msg = create ? 109 VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : 110 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY; 111 } else { 112 for (i = 0; i < GEN8_3LVL_PDPES; i++) { 113 const u64 daddr = i915_page_dir_dma_addr(ppgtt, i); 114 115 intel_uncore_write(uncore, 116 vgtif_reg(pdp[i].lo), 117 lower_32_bits(daddr)); 118 intel_uncore_write(uncore, 119 vgtif_reg(pdp[i].hi), 120 upper_32_bits(daddr)); 121 } 122 123 msg = create ? 124 VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : 125 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY; 126 } 127 128 /* g2v_notify atomically (via hv trap) consumes the message packet. */ 129 intel_uncore_write(uncore, vgtif_reg(g2v_notify), msg); 130 131 mutex_unlock(&i915->vgpu.lock); 132 } 133 134 /* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */ 135 #define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */ 136 #define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE)) 137 #define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64)) 138 #define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES)) 139 #define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl)) 140 #define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl)) 141 #define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl)) 142 143 #define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt) 144 145 static unsigned int 146 gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx) 147 { 148 const int shift = gen8_pd_shift(lvl); 149 const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); 150 151 GEM_BUG_ON(start >= end); 152 end += ~mask >> gen8_pd_shift(1); 153 154 *idx = i915_pde_index(start, shift); 155 if ((start ^ end) & mask) 156 return GEN8_PDES - *idx; 157 else 158 return i915_pde_index(end, shift) - *idx; 159 } 160 161 static bool gen8_pd_contains(u64 start, u64 end, int lvl) 162 { 163 const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); 164 165 GEM_BUG_ON(start >= end); 166 return (start ^ end) & mask && (start & ~mask) == 0; 167 } 168 169 static unsigned int gen8_pt_count(u64 start, u64 end) 170 { 171 GEM_BUG_ON(start >= end); 172 if ((start ^ end) >> gen8_pd_shift(1)) 173 return GEN8_PDES - (start & (GEN8_PDES - 1)); 174 else 175 return end - start; 176 } 177 178 static unsigned int gen8_pd_top_count(const struct i915_address_space *vm) 179 { 180 unsigned int shift = __gen8_pte_shift(vm->top); 181 182 return (vm->total + (1ull << shift) - 1) >> shift; 183 } 184 185 static struct i915_page_directory * 186 gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx) 187 { 188 struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); 189 190 if (vm->top == 2) 191 return ppgtt->pd; 192 else 193 return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top)); 194 } 195 196 static struct i915_page_directory * 197 gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr) 198 { 199 return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT); 200 } 201 202 static void __gen8_ppgtt_cleanup(struct i915_address_space *vm, 203 struct i915_page_directory *pd, 204 int count, int lvl) 205 { 206 if (lvl) { 207 void **pde = pd->entry; 208 209 do { 210 if (!*pde) 211 continue; 212 213 __gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1); 214 } while (pde++, --count); 215 } 216 217 free_px(vm, &pd->pt, lvl); 218 } 219 220 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 221 { 222 struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 223 224 if (intel_vgpu_active(vm->i915)) 225 gen8_ppgtt_notify_vgt(ppgtt, false); 226 227 if (ppgtt->pd) 228 __gen8_ppgtt_cleanup(vm, ppgtt->pd, 229 gen8_pd_top_count(vm), vm->top); 230 231 free_scratch(vm); 232 } 233 234 static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, 235 struct i915_page_directory * const pd, 236 u64 start, const u64 end, int lvl) 237 { 238 const struct drm_i915_gem_object * const scratch = vm->scratch[lvl]; 239 unsigned int idx, len; 240 241 GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); 242 243 len = gen8_pd_range(start, end, lvl--, &idx); 244 DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", 245 __func__, vm, lvl + 1, start, end, 246 idx, len, atomic_read(px_used(pd))); 247 GEM_BUG_ON(!len || len >= atomic_read(px_used(pd))); 248 249 do { 250 struct i915_page_table *pt = pd->entry[idx]; 251 252 if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) && 253 gen8_pd_contains(start, end, lvl)) { 254 DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n", 255 __func__, vm, lvl + 1, idx, start, end); 256 clear_pd_entry(pd, idx, scratch); 257 __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl); 258 start += (u64)I915_PDES << gen8_pd_shift(lvl); 259 continue; 260 } 261 262 if (lvl) { 263 start = __gen8_ppgtt_clear(vm, as_pd(pt), 264 start, end, lvl); 265 } else { 266 unsigned int count; 267 unsigned int pte = gen8_pd_index(start, 0); 268 unsigned int num_ptes; 269 u64 *vaddr; 270 271 count = gen8_pt_count(start, end); 272 DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n", 273 __func__, vm, lvl, start, end, 274 gen8_pd_index(start, 0), count, 275 atomic_read(&pt->used)); 276 GEM_BUG_ON(!count || count >= atomic_read(&pt->used)); 277 278 num_ptes = count; 279 if (pt->is_compact) { 280 GEM_BUG_ON(num_ptes % 16); 281 GEM_BUG_ON(pte % 16); 282 num_ptes /= 16; 283 pte /= 16; 284 } 285 286 vaddr = px_vaddr(pt); 287 memset64(vaddr + pte, 288 vm->scratch[0]->encode, 289 num_ptes); 290 291 atomic_sub(count, &pt->used); 292 start += count; 293 } 294 295 if (release_pd_entry(pd, idx, pt, scratch)) 296 free_px(vm, pt, lvl); 297 } while (idx++, --len); 298 299 return start; 300 } 301 302 static void gen8_ppgtt_clear(struct i915_address_space *vm, 303 u64 start, u64 length) 304 { 305 GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); 306 GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); 307 GEM_BUG_ON(range_overflows(start, length, vm->total)); 308 309 start >>= GEN8_PTE_SHIFT; 310 length >>= GEN8_PTE_SHIFT; 311 GEM_BUG_ON(length == 0); 312 313 __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd, 314 start, start + length, vm->top); 315 } 316 317 static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, 318 struct i915_vm_pt_stash *stash, 319 struct i915_page_directory * const pd, 320 u64 * const start, const u64 end, int lvl) 321 { 322 unsigned int idx, len; 323 324 GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); 325 326 len = gen8_pd_range(*start, end, lvl--, &idx); 327 DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", 328 __func__, vm, lvl + 1, *start, end, 329 idx, len, atomic_read(px_used(pd))); 330 GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1)); 331 332 spin_lock(&pd->lock); 333 GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */ 334 do { 335 struct i915_page_table *pt = pd->entry[idx]; 336 337 if (!pt) { 338 spin_unlock(&pd->lock); 339 340 DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n", 341 __func__, vm, lvl + 1, idx); 342 343 pt = stash->pt[!!lvl]; 344 __i915_gem_object_pin_pages(pt->base); 345 346 fill_px(pt, vm->scratch[lvl]->encode); 347 348 spin_lock(&pd->lock); 349 if (likely(!pd->entry[idx])) { 350 stash->pt[!!lvl] = pt->stash; 351 atomic_set(&pt->used, 0); 352 set_pd_entry(pd, idx, pt); 353 } else { 354 pt = pd->entry[idx]; 355 } 356 } 357 358 if (lvl) { 359 atomic_inc(&pt->used); 360 spin_unlock(&pd->lock); 361 362 __gen8_ppgtt_alloc(vm, stash, 363 as_pd(pt), start, end, lvl); 364 365 spin_lock(&pd->lock); 366 atomic_dec(&pt->used); 367 GEM_BUG_ON(!atomic_read(&pt->used)); 368 } else { 369 unsigned int count = gen8_pt_count(*start, end); 370 371 DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n", 372 __func__, vm, lvl, *start, end, 373 gen8_pd_index(*start, 0), count, 374 atomic_read(&pt->used)); 375 376 atomic_add(count, &pt->used); 377 /* All other pdes may be simultaneously removed */ 378 GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES); 379 *start += count; 380 } 381 } while (idx++, --len); 382 spin_unlock(&pd->lock); 383 } 384 385 static void gen8_ppgtt_alloc(struct i915_address_space *vm, 386 struct i915_vm_pt_stash *stash, 387 u64 start, u64 length) 388 { 389 GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); 390 GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); 391 GEM_BUG_ON(range_overflows(start, length, vm->total)); 392 393 start >>= GEN8_PTE_SHIFT; 394 length >>= GEN8_PTE_SHIFT; 395 GEM_BUG_ON(length == 0); 396 397 __gen8_ppgtt_alloc(vm, stash, i915_vm_to_ppgtt(vm)->pd, 398 &start, start + length, vm->top); 399 } 400 401 static void __gen8_ppgtt_foreach(struct i915_address_space *vm, 402 struct i915_page_directory *pd, 403 u64 *start, u64 end, int lvl, 404 void (*fn)(struct i915_address_space *vm, 405 struct i915_page_table *pt, 406 void *data), 407 void *data) 408 { 409 unsigned int idx, len; 410 411 len = gen8_pd_range(*start, end, lvl--, &idx); 412 413 spin_lock(&pd->lock); 414 do { 415 struct i915_page_table *pt = pd->entry[idx]; 416 417 atomic_inc(&pt->used); 418 spin_unlock(&pd->lock); 419 420 if (lvl) { 421 __gen8_ppgtt_foreach(vm, as_pd(pt), start, end, lvl, 422 fn, data); 423 } else { 424 fn(vm, pt, data); 425 *start += gen8_pt_count(*start, end); 426 } 427 428 spin_lock(&pd->lock); 429 atomic_dec(&pt->used); 430 } while (idx++, --len); 431 spin_unlock(&pd->lock); 432 } 433 434 static void gen8_ppgtt_foreach(struct i915_address_space *vm, 435 u64 start, u64 length, 436 void (*fn)(struct i915_address_space *vm, 437 struct i915_page_table *pt, 438 void *data), 439 void *data) 440 { 441 start >>= GEN8_PTE_SHIFT; 442 length >>= GEN8_PTE_SHIFT; 443 444 __gen8_ppgtt_foreach(vm, i915_vm_to_ppgtt(vm)->pd, 445 &start, start + length, vm->top, 446 fn, data); 447 } 448 449 static __always_inline u64 450 gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, 451 struct i915_page_directory *pdp, 452 struct sgt_dma *iter, 453 u64 idx, 454 enum i915_cache_level cache_level, 455 u32 flags) 456 { 457 struct i915_page_directory *pd; 458 const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, flags); 459 gen8_pte_t *vaddr; 460 461 pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); 462 vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); 463 do { 464 GEM_BUG_ON(sg_dma_len(iter->sg) < I915_GTT_PAGE_SIZE); 465 vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma; 466 467 iter->dma += I915_GTT_PAGE_SIZE; 468 if (iter->dma >= iter->max) { 469 iter->sg = __sg_next(iter->sg); 470 if (!iter->sg || sg_dma_len(iter->sg) == 0) { 471 idx = 0; 472 break; 473 } 474 475 iter->dma = sg_dma_address(iter->sg); 476 iter->max = iter->dma + sg_dma_len(iter->sg); 477 } 478 479 if (gen8_pd_index(++idx, 0) == 0) { 480 if (gen8_pd_index(idx, 1) == 0) { 481 /* Limited by sg length for 3lvl */ 482 if (gen8_pd_index(idx, 2) == 0) 483 break; 484 485 pd = pdp->entry[gen8_pd_index(idx, 2)]; 486 } 487 488 drm_clflush_virt_range(vaddr, PAGE_SIZE); 489 vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); 490 } 491 } while (1); 492 drm_clflush_virt_range(vaddr, PAGE_SIZE); 493 494 return idx; 495 } 496 497 static void 498 xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, 499 struct i915_vma_resource *vma_res, 500 struct sgt_dma *iter, 501 enum i915_cache_level cache_level, 502 u32 flags) 503 { 504 const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags); 505 unsigned int rem = sg_dma_len(iter->sg); 506 u64 start = vma_res->start; 507 u64 end = start + vma_res->vma_size; 508 509 GEM_BUG_ON(!i915_vm_is_4lvl(vm)); 510 511 do { 512 struct i915_page_directory * const pdp = 513 gen8_pdp_for_page_address(vm, start); 514 struct i915_page_directory * const pd = 515 i915_pd_entry(pdp, __gen8_pte_index(start, 2)); 516 struct i915_page_table *pt = 517 i915_pt_entry(pd, __gen8_pte_index(start, 1)); 518 gen8_pte_t encode = pte_encode; 519 unsigned int page_size; 520 gen8_pte_t *vaddr; 521 u16 index, max, nent, i; 522 523 max = I915_PDES; 524 nent = 1; 525 526 if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M && 527 IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && 528 rem >= I915_GTT_PAGE_SIZE_2M && 529 !__gen8_pte_index(start, 0)) { 530 index = __gen8_pte_index(start, 1); 531 encode |= GEN8_PDE_PS_2M; 532 page_size = I915_GTT_PAGE_SIZE_2M; 533 534 vaddr = px_vaddr(pd); 535 } else { 536 index = __gen8_pte_index(start, 0); 537 page_size = I915_GTT_PAGE_SIZE; 538 539 if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { 540 /* 541 * Device local-memory on these platforms should 542 * always use 64K pages or larger (including GTT 543 * alignment), therefore if we know the whole 544 * page-table needs to be filled we can always 545 * safely use the compact-layout. Otherwise fall 546 * back to the TLB hint with PS64. If this is 547 * system memory we only bother with PS64. 548 */ 549 if ((encode & GEN12_PPGTT_PTE_LM) && 550 end - start >= SZ_2M && !index) { 551 index = __gen8_pte_index(start, 0) / 16; 552 page_size = I915_GTT_PAGE_SIZE_64K; 553 554 max /= 16; 555 556 vaddr = px_vaddr(pd); 557 vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K; 558 559 pt->is_compact = true; 560 } else if (IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && 561 rem >= I915_GTT_PAGE_SIZE_64K && 562 !(index % 16)) { 563 encode |= GEN12_PTE_PS64; 564 page_size = I915_GTT_PAGE_SIZE_64K; 565 nent = 16; 566 } 567 } 568 569 vaddr = px_vaddr(pt); 570 } 571 572 do { 573 GEM_BUG_ON(rem < page_size); 574 575 for (i = 0; i < nent; i++) { 576 vaddr[index++] = 577 encode | (iter->dma + i * 578 I915_GTT_PAGE_SIZE); 579 } 580 581 start += page_size; 582 iter->dma += page_size; 583 rem -= page_size; 584 if (iter->dma >= iter->max) { 585 iter->sg = __sg_next(iter->sg); 586 if (!iter->sg) 587 break; 588 589 rem = sg_dma_len(iter->sg); 590 if (!rem) 591 break; 592 593 iter->dma = sg_dma_address(iter->sg); 594 iter->max = iter->dma + rem; 595 596 if (unlikely(!IS_ALIGNED(iter->dma, page_size))) 597 break; 598 } 599 } while (rem >= page_size && index < max); 600 601 vma_res->page_sizes_gtt |= page_size; 602 } while (iter->sg && sg_dma_len(iter->sg)); 603 } 604 605 static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, 606 struct i915_vma_resource *vma_res, 607 struct sgt_dma *iter, 608 enum i915_cache_level cache_level, 609 u32 flags) 610 { 611 const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags); 612 unsigned int rem = sg_dma_len(iter->sg); 613 u64 start = vma_res->start; 614 615 GEM_BUG_ON(!i915_vm_is_4lvl(vm)); 616 617 do { 618 struct i915_page_directory * const pdp = 619 gen8_pdp_for_page_address(vm, start); 620 struct i915_page_directory * const pd = 621 i915_pd_entry(pdp, __gen8_pte_index(start, 2)); 622 gen8_pte_t encode = pte_encode; 623 unsigned int maybe_64K = -1; 624 unsigned int page_size; 625 gen8_pte_t *vaddr; 626 u16 index; 627 628 if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M && 629 IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && 630 rem >= I915_GTT_PAGE_SIZE_2M && 631 !__gen8_pte_index(start, 0)) { 632 index = __gen8_pte_index(start, 1); 633 encode |= GEN8_PDE_PS_2M; 634 page_size = I915_GTT_PAGE_SIZE_2M; 635 636 vaddr = px_vaddr(pd); 637 } else { 638 struct i915_page_table *pt = 639 i915_pt_entry(pd, __gen8_pte_index(start, 1)); 640 641 index = __gen8_pte_index(start, 0); 642 page_size = I915_GTT_PAGE_SIZE; 643 644 if (!index && 645 vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K && 646 IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && 647 (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || 648 rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)) 649 maybe_64K = __gen8_pte_index(start, 1); 650 651 vaddr = px_vaddr(pt); 652 } 653 654 do { 655 GEM_BUG_ON(sg_dma_len(iter->sg) < page_size); 656 vaddr[index++] = encode | iter->dma; 657 658 start += page_size; 659 iter->dma += page_size; 660 rem -= page_size; 661 if (iter->dma >= iter->max) { 662 iter->sg = __sg_next(iter->sg); 663 if (!iter->sg) 664 break; 665 666 rem = sg_dma_len(iter->sg); 667 if (!rem) 668 break; 669 670 iter->dma = sg_dma_address(iter->sg); 671 iter->max = iter->dma + rem; 672 673 if (maybe_64K != -1 && index < I915_PDES && 674 !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && 675 (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || 676 rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))) 677 maybe_64K = -1; 678 679 if (unlikely(!IS_ALIGNED(iter->dma, page_size))) 680 break; 681 } 682 } while (rem >= page_size && index < I915_PDES); 683 684 drm_clflush_virt_range(vaddr, PAGE_SIZE); 685 686 /* 687 * Is it safe to mark the 2M block as 64K? -- Either we have 688 * filled whole page-table with 64K entries, or filled part of 689 * it and have reached the end of the sg table and we have 690 * enough padding. 691 */ 692 if (maybe_64K != -1 && 693 (index == I915_PDES || 694 (i915_vm_has_scratch_64K(vm) && 695 !iter->sg && IS_ALIGNED(vma_res->start + 696 vma_res->node_size, 697 I915_GTT_PAGE_SIZE_2M)))) { 698 vaddr = px_vaddr(pd); 699 vaddr[maybe_64K] |= GEN8_PDE_IPS_64K; 700 drm_clflush_virt_range(vaddr, PAGE_SIZE); 701 page_size = I915_GTT_PAGE_SIZE_64K; 702 703 /* 704 * We write all 4K page entries, even when using 64K 705 * pages. In order to verify that the HW isn't cheating 706 * by using the 4K PTE instead of the 64K PTE, we want 707 * to remove all the surplus entries. If the HW skipped 708 * the 64K PTE, it will read/write into the scratch page 709 * instead - which we detect as missing results during 710 * selftests. 711 */ 712 if (I915_SELFTEST_ONLY(vm->scrub_64K)) { 713 u16 i; 714 715 encode = vm->scratch[0]->encode; 716 vaddr = px_vaddr(i915_pt_entry(pd, maybe_64K)); 717 718 for (i = 1; i < index; i += 16) 719 memset64(vaddr + i, encode, 15); 720 721 drm_clflush_virt_range(vaddr, PAGE_SIZE); 722 } 723 } 724 725 vma_res->page_sizes_gtt |= page_size; 726 } while (iter->sg && sg_dma_len(iter->sg)); 727 } 728 729 static void gen8_ppgtt_insert(struct i915_address_space *vm, 730 struct i915_vma_resource *vma_res, 731 enum i915_cache_level cache_level, 732 u32 flags) 733 { 734 struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); 735 struct sgt_dma iter = sgt_dma(vma_res); 736 737 if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) { 738 if (HAS_64K_PAGES(vm->i915)) 739 xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags); 740 else 741 gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags); 742 } else { 743 u64 idx = vma_res->start >> GEN8_PTE_SHIFT; 744 745 do { 746 struct i915_page_directory * const pdp = 747 gen8_pdp_for_page_index(vm, idx); 748 749 idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx, 750 cache_level, flags); 751 } while (idx); 752 753 vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; 754 } 755 } 756 757 static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, 758 dma_addr_t addr, 759 u64 offset, 760 enum i915_cache_level level, 761 u32 flags) 762 { 763 u64 idx = offset >> GEN8_PTE_SHIFT; 764 struct i915_page_directory * const pdp = 765 gen8_pdp_for_page_index(vm, idx); 766 struct i915_page_directory *pd = 767 i915_pd_entry(pdp, gen8_pd_index(idx, 2)); 768 struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1)); 769 gen8_pte_t *vaddr; 770 771 GEM_BUG_ON(pt->is_compact); 772 773 vaddr = px_vaddr(pt); 774 vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags); 775 drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); 776 } 777 778 static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, 779 dma_addr_t addr, 780 u64 offset, 781 enum i915_cache_level level, 782 u32 flags) 783 { 784 u64 idx = offset >> GEN8_PTE_SHIFT; 785 struct i915_page_directory * const pdp = 786 gen8_pdp_for_page_index(vm, idx); 787 struct i915_page_directory *pd = 788 i915_pd_entry(pdp, gen8_pd_index(idx, 2)); 789 struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1)); 790 gen8_pte_t *vaddr; 791 792 GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K)); 793 GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K)); 794 795 /* XXX: we don't strictly need to use this layout */ 796 797 if (!pt->is_compact) { 798 vaddr = px_vaddr(pd); 799 vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K; 800 pt->is_compact = true; 801 } 802 803 vaddr = px_vaddr(pt); 804 vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags); 805 } 806 807 static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm, 808 dma_addr_t addr, 809 u64 offset, 810 enum i915_cache_level level, 811 u32 flags) 812 { 813 if (flags & PTE_LM) 814 return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset, 815 level, flags); 816 817 return gen8_ppgtt_insert_entry(vm, addr, offset, level, flags); 818 } 819 820 static int gen8_init_scratch(struct i915_address_space *vm) 821 { 822 u32 pte_flags; 823 int ret; 824 int i; 825 826 /* 827 * If everybody agrees to not to write into the scratch page, 828 * we can reuse it for all vm, keeping contexts and processes separate. 829 */ 830 if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) { 831 struct i915_address_space *clone = vm->gt->vm; 832 833 GEM_BUG_ON(!clone->has_read_only); 834 835 vm->scratch_order = clone->scratch_order; 836 for (i = 0; i <= vm->top; i++) 837 vm->scratch[i] = i915_gem_object_get(clone->scratch[i]); 838 839 return 0; 840 } 841 842 ret = setup_scratch_page(vm); 843 if (ret) 844 return ret; 845 846 pte_flags = vm->has_read_only; 847 if (i915_gem_object_is_lmem(vm->scratch[0])) 848 pte_flags |= PTE_LM; 849 850 vm->scratch[0]->encode = 851 vm->pte_encode(px_dma(vm->scratch[0]), 852 I915_CACHE_NONE, pte_flags); 853 854 for (i = 1; i <= vm->top; i++) { 855 struct drm_i915_gem_object *obj; 856 857 obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); 858 if (IS_ERR(obj)) { 859 ret = PTR_ERR(obj); 860 goto free_scratch; 861 } 862 863 ret = map_pt_dma(vm, obj); 864 if (ret) { 865 i915_gem_object_put(obj); 866 goto free_scratch; 867 } 868 869 fill_px(obj, vm->scratch[i - 1]->encode); 870 obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_NONE); 871 872 vm->scratch[i] = obj; 873 } 874 875 return 0; 876 877 free_scratch: 878 while (i--) 879 i915_gem_object_put(vm->scratch[i]); 880 vm->scratch[0] = NULL; 881 return ret; 882 } 883 884 static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) 885 { 886 struct i915_address_space *vm = &ppgtt->vm; 887 struct i915_page_directory *pd = ppgtt->pd; 888 unsigned int idx; 889 890 GEM_BUG_ON(vm->top != 2); 891 GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES); 892 893 for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) { 894 struct i915_page_directory *pde; 895 int err; 896 897 pde = alloc_pd(vm); 898 if (IS_ERR(pde)) 899 return PTR_ERR(pde); 900 901 err = map_pt_dma(vm, pde->pt.base); 902 if (err) { 903 free_pd(vm, pde); 904 return err; 905 } 906 907 fill_px(pde, vm->scratch[1]->encode); 908 set_pd_entry(pd, idx, pde); 909 atomic_inc(px_used(pde)); /* keep pinned */ 910 } 911 wmb(); 912 913 return 0; 914 } 915 916 static struct i915_page_directory * 917 gen8_alloc_top_pd(struct i915_address_space *vm) 918 { 919 const unsigned int count = gen8_pd_top_count(vm); 920 struct i915_page_directory *pd; 921 int err; 922 923 GEM_BUG_ON(count > I915_PDES); 924 925 pd = __alloc_pd(count); 926 if (unlikely(!pd)) 927 return ERR_PTR(-ENOMEM); 928 929 pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); 930 if (IS_ERR(pd->pt.base)) { 931 err = PTR_ERR(pd->pt.base); 932 pd->pt.base = NULL; 933 goto err_pd; 934 } 935 936 err = map_pt_dma(vm, pd->pt.base); 937 if (err) 938 goto err_pd; 939 940 fill_page_dma(px_base(pd), vm->scratch[vm->top]->encode, count); 941 atomic_inc(px_used(pd)); /* mark as pinned */ 942 return pd; 943 944 err_pd: 945 free_pd(vm, pd); 946 return ERR_PTR(err); 947 } 948 949 /* 950 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 951 * with a net effect resembling a 2-level page table in normal x86 terms. Each 952 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 953 * space. 954 * 955 */ 956 struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, 957 unsigned long lmem_pt_obj_flags) 958 { 959 struct i915_page_directory *pd; 960 struct i915_ppgtt *ppgtt; 961 int err; 962 963 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 964 if (!ppgtt) 965 return ERR_PTR(-ENOMEM); 966 967 ppgtt_init(ppgtt, gt, lmem_pt_obj_flags); 968 ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2; 969 ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t)); 970 971 /* 972 * From bdw, there is hw support for read-only pages in the PPGTT. 973 * 974 * Gen11 has HSDES#:1807136187 unresolved. Disable ro support 975 * for now. 976 * 977 * Gen12 has inherited the same read-only fault issue from gen11. 978 */ 979 ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12); 980 981 if (HAS_LMEM(gt->i915)) 982 ppgtt->vm.alloc_pt_dma = alloc_pt_lmem; 983 else 984 ppgtt->vm.alloc_pt_dma = alloc_pt_dma; 985 986 /* 987 * Using SMEM here instead of LMEM has the advantage of not reserving 988 * high performance memory for a "never" used filler page. It also 989 * removes the device access that would be required to initialise the 990 * scratch page, reducing pressure on an even scarcer resource. 991 */ 992 ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; 993 994 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) 995 ppgtt->vm.pte_encode = mtl_pte_encode; 996 else 997 ppgtt->vm.pte_encode = gen8_pte_encode; 998 999 ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; 1000 ppgtt->vm.insert_entries = gen8_ppgtt_insert; 1001 if (HAS_64K_PAGES(gt->i915)) 1002 ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry; 1003 else 1004 ppgtt->vm.insert_page = gen8_ppgtt_insert_entry; 1005 ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; 1006 ppgtt->vm.clear_range = gen8_ppgtt_clear; 1007 ppgtt->vm.foreach = gen8_ppgtt_foreach; 1008 ppgtt->vm.cleanup = gen8_ppgtt_cleanup; 1009 1010 err = gen8_init_scratch(&ppgtt->vm); 1011 if (err) 1012 goto err_put; 1013 1014 pd = gen8_alloc_top_pd(&ppgtt->vm); 1015 if (IS_ERR(pd)) { 1016 err = PTR_ERR(pd); 1017 goto err_put; 1018 } 1019 ppgtt->pd = pd; 1020 1021 if (!i915_vm_is_4lvl(&ppgtt->vm)) { 1022 err = gen8_preallocate_top_level_pdp(ppgtt); 1023 if (err) 1024 goto err_put; 1025 } 1026 1027 if (intel_vgpu_active(gt->i915)) 1028 gen8_ppgtt_notify_vgt(ppgtt, true); 1029 1030 return ppgtt; 1031 1032 err_put: 1033 i915_vm_put(&ppgtt->vm); 1034 return ERR_PTR(err); 1035 } 1036