1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/log2.h> 7 8 #include "gem/i915_gem_lmem.h" 9 10 #include "gen8_ppgtt.h" 11 #include "i915_scatterlist.h" 12 #include "i915_trace.h" 13 #include "i915_pvinfo.h" 14 #include "i915_vgpu.h" 15 #include "intel_gt.h" 16 #include "intel_gtt.h" 17 18 static u64 gen8_pde_encode(const dma_addr_t addr, 19 const enum i915_cache_level level) 20 { 21 u64 pde = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; 22 23 if (level != I915_CACHE_NONE) 24 pde |= PPAT_CACHED_PDE; 25 else 26 pde |= PPAT_UNCACHED; 27 28 return pde; 29 } 30 31 static u64 gen8_pte_encode(dma_addr_t addr, 32 unsigned int pat_index, 33 u32 flags) 34 { 35 gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; 36 37 if (unlikely(flags & PTE_READ_ONLY)) 38 pte &= ~GEN8_PAGE_RW; 39 40 if (flags & PTE_LM) 41 pte |= GEN12_PPGTT_PTE_LM; 42 43 /* 44 * For pre-gen12 platforms pat_index is the same as enum 45 * i915_cache_level, so the switch-case here is still valid. 46 * See translation table defined by LEGACY_CACHELEVEL. 47 */ 48 switch (pat_index) { 49 case I915_CACHE_NONE: 50 pte |= PPAT_UNCACHED; 51 break; 52 case I915_CACHE_WT: 53 pte |= PPAT_DISPLAY_ELLC; 54 break; 55 default: 56 pte |= PPAT_CACHED; 57 break; 58 } 59 60 return pte; 61 } 62 63 static u64 gen12_pte_encode(dma_addr_t addr, 64 unsigned int pat_index, 65 u32 flags) 66 { 67 gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; 68 69 if (unlikely(flags & PTE_READ_ONLY)) 70 pte &= ~GEN8_PAGE_RW; 71 72 if (flags & PTE_LM) 73 pte |= GEN12_PPGTT_PTE_LM; 74 75 if (pat_index & BIT(0)) 76 pte |= GEN12_PPGTT_PTE_PAT0; 77 78 if (pat_index & BIT(1)) 79 pte |= GEN12_PPGTT_PTE_PAT1; 80 81 if (pat_index & BIT(2)) 82 pte |= GEN12_PPGTT_PTE_PAT2; 83 84 if (pat_index & BIT(3)) 85 pte |= MTL_PPGTT_PTE_PAT3; 86 87 return pte; 88 } 89 90 static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) 91 { 92 struct drm_i915_private *i915 = ppgtt->vm.i915; 93 struct intel_uncore *uncore = ppgtt->vm.gt->uncore; 94 enum vgt_g2v_type msg; 95 int i; 96 97 if (create) 98 atomic_inc(px_used(ppgtt->pd)); /* never remove */ 99 else 100 atomic_dec(px_used(ppgtt->pd)); 101 102 mutex_lock(&i915->vgpu.lock); 103 104 if (i915_vm_is_4lvl(&ppgtt->vm)) { 105 const u64 daddr = px_dma(ppgtt->pd); 106 107 intel_uncore_write(uncore, 108 vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); 109 intel_uncore_write(uncore, 110 vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); 111 112 msg = create ? 113 VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : 114 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY; 115 } else { 116 for (i = 0; i < GEN8_3LVL_PDPES; i++) { 117 const u64 daddr = i915_page_dir_dma_addr(ppgtt, i); 118 119 intel_uncore_write(uncore, 120 vgtif_reg(pdp[i].lo), 121 lower_32_bits(daddr)); 122 intel_uncore_write(uncore, 123 vgtif_reg(pdp[i].hi), 124 upper_32_bits(daddr)); 125 } 126 127 msg = create ? 128 VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : 129 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY; 130 } 131 132 /* g2v_notify atomically (via hv trap) consumes the message packet. */ 133 intel_uncore_write(uncore, vgtif_reg(g2v_notify), msg); 134 135 mutex_unlock(&i915->vgpu.lock); 136 } 137 138 /* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */ 139 #define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */ 140 #define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE)) 141 #define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64)) 142 #define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES)) 143 #define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl)) 144 #define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl)) 145 #define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl)) 146 147 #define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt) 148 149 static unsigned int 150 gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx) 151 { 152 const int shift = gen8_pd_shift(lvl); 153 const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); 154 155 GEM_BUG_ON(start >= end); 156 end += ~mask >> gen8_pd_shift(1); 157 158 *idx = i915_pde_index(start, shift); 159 if ((start ^ end) & mask) 160 return GEN8_PDES - *idx; 161 else 162 return i915_pde_index(end, shift) - *idx; 163 } 164 165 static bool gen8_pd_contains(u64 start, u64 end, int lvl) 166 { 167 const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); 168 169 GEM_BUG_ON(start >= end); 170 return (start ^ end) & mask && (start & ~mask) == 0; 171 } 172 173 static unsigned int gen8_pt_count(u64 start, u64 end) 174 { 175 GEM_BUG_ON(start >= end); 176 if ((start ^ end) >> gen8_pd_shift(1)) 177 return GEN8_PDES - (start & (GEN8_PDES - 1)); 178 else 179 return end - start; 180 } 181 182 static unsigned int gen8_pd_top_count(const struct i915_address_space *vm) 183 { 184 unsigned int shift = __gen8_pte_shift(vm->top); 185 186 return (vm->total + (1ull << shift) - 1) >> shift; 187 } 188 189 static struct i915_page_directory * 190 gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx) 191 { 192 struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); 193 194 if (vm->top == 2) 195 return ppgtt->pd; 196 else 197 return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top)); 198 } 199 200 static struct i915_page_directory * 201 gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr) 202 { 203 return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT); 204 } 205 206 static void __gen8_ppgtt_cleanup(struct i915_address_space *vm, 207 struct i915_page_directory *pd, 208 int count, int lvl) 209 { 210 if (lvl) { 211 void **pde = pd->entry; 212 213 do { 214 if (!*pde) 215 continue; 216 217 __gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1); 218 } while (pde++, --count); 219 } 220 221 free_px(vm, &pd->pt, lvl); 222 } 223 224 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 225 { 226 struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 227 228 if (intel_vgpu_active(vm->i915)) 229 gen8_ppgtt_notify_vgt(ppgtt, false); 230 231 if (ppgtt->pd) 232 __gen8_ppgtt_cleanup(vm, ppgtt->pd, 233 gen8_pd_top_count(vm), vm->top); 234 235 free_scratch(vm); 236 } 237 238 static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, 239 struct i915_page_directory * const pd, 240 u64 start, const u64 end, int lvl) 241 { 242 const struct drm_i915_gem_object * const scratch = vm->scratch[lvl]; 243 unsigned int idx, len; 244 245 GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); 246 247 len = gen8_pd_range(start, end, lvl--, &idx); 248 DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", 249 __func__, vm, lvl + 1, start, end, 250 idx, len, atomic_read(px_used(pd))); 251 GEM_BUG_ON(!len || len >= atomic_read(px_used(pd))); 252 253 do { 254 struct i915_page_table *pt = pd->entry[idx]; 255 256 if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) && 257 gen8_pd_contains(start, end, lvl)) { 258 DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n", 259 __func__, vm, lvl + 1, idx, start, end); 260 clear_pd_entry(pd, idx, scratch); 261 __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl); 262 start += (u64)I915_PDES << gen8_pd_shift(lvl); 263 continue; 264 } 265 266 if (lvl) { 267 start = __gen8_ppgtt_clear(vm, as_pd(pt), 268 start, end, lvl); 269 } else { 270 unsigned int count; 271 unsigned int pte = gen8_pd_index(start, 0); 272 unsigned int num_ptes; 273 u64 *vaddr; 274 275 count = gen8_pt_count(start, end); 276 DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n", 277 __func__, vm, lvl, start, end, 278 gen8_pd_index(start, 0), count, 279 atomic_read(&pt->used)); 280 GEM_BUG_ON(!count || count >= atomic_read(&pt->used)); 281 282 num_ptes = count; 283 if (pt->is_compact) { 284 GEM_BUG_ON(num_ptes % 16); 285 GEM_BUG_ON(pte % 16); 286 num_ptes /= 16; 287 pte /= 16; 288 } 289 290 vaddr = px_vaddr(pt); 291 memset64(vaddr + pte, 292 vm->scratch[0]->encode, 293 num_ptes); 294 295 atomic_sub(count, &pt->used); 296 start += count; 297 } 298 299 if (release_pd_entry(pd, idx, pt, scratch)) 300 free_px(vm, pt, lvl); 301 } while (idx++, --len); 302 303 return start; 304 } 305 306 static void gen8_ppgtt_clear(struct i915_address_space *vm, 307 u64 start, u64 length) 308 { 309 GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); 310 GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); 311 GEM_BUG_ON(range_overflows(start, length, vm->total)); 312 313 start >>= GEN8_PTE_SHIFT; 314 length >>= GEN8_PTE_SHIFT; 315 GEM_BUG_ON(length == 0); 316 317 __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd, 318 start, start + length, vm->top); 319 } 320 321 static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, 322 struct i915_vm_pt_stash *stash, 323 struct i915_page_directory * const pd, 324 u64 * const start, const u64 end, int lvl) 325 { 326 unsigned int idx, len; 327 328 GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); 329 330 len = gen8_pd_range(*start, end, lvl--, &idx); 331 DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", 332 __func__, vm, lvl + 1, *start, end, 333 idx, len, atomic_read(px_used(pd))); 334 GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1)); 335 336 spin_lock(&pd->lock); 337 GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */ 338 do { 339 struct i915_page_table *pt = pd->entry[idx]; 340 341 if (!pt) { 342 spin_unlock(&pd->lock); 343 344 DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n", 345 __func__, vm, lvl + 1, idx); 346 347 pt = stash->pt[!!lvl]; 348 __i915_gem_object_pin_pages(pt->base); 349 350 fill_px(pt, vm->scratch[lvl]->encode); 351 352 spin_lock(&pd->lock); 353 if (likely(!pd->entry[idx])) { 354 stash->pt[!!lvl] = pt->stash; 355 atomic_set(&pt->used, 0); 356 set_pd_entry(pd, idx, pt); 357 } else { 358 pt = pd->entry[idx]; 359 } 360 } 361 362 if (lvl) { 363 atomic_inc(&pt->used); 364 spin_unlock(&pd->lock); 365 366 __gen8_ppgtt_alloc(vm, stash, 367 as_pd(pt), start, end, lvl); 368 369 spin_lock(&pd->lock); 370 atomic_dec(&pt->used); 371 GEM_BUG_ON(!atomic_read(&pt->used)); 372 } else { 373 unsigned int count = gen8_pt_count(*start, end); 374 375 DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n", 376 __func__, vm, lvl, *start, end, 377 gen8_pd_index(*start, 0), count, 378 atomic_read(&pt->used)); 379 380 atomic_add(count, &pt->used); 381 /* All other pdes may be simultaneously removed */ 382 GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES); 383 *start += count; 384 } 385 } while (idx++, --len); 386 spin_unlock(&pd->lock); 387 } 388 389 static void gen8_ppgtt_alloc(struct i915_address_space *vm, 390 struct i915_vm_pt_stash *stash, 391 u64 start, u64 length) 392 { 393 GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); 394 GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); 395 GEM_BUG_ON(range_overflows(start, length, vm->total)); 396 397 start >>= GEN8_PTE_SHIFT; 398 length >>= GEN8_PTE_SHIFT; 399 GEM_BUG_ON(length == 0); 400 401 __gen8_ppgtt_alloc(vm, stash, i915_vm_to_ppgtt(vm)->pd, 402 &start, start + length, vm->top); 403 } 404 405 static void __gen8_ppgtt_foreach(struct i915_address_space *vm, 406 struct i915_page_directory *pd, 407 u64 *start, u64 end, int lvl, 408 void (*fn)(struct i915_address_space *vm, 409 struct i915_page_table *pt, 410 void *data), 411 void *data) 412 { 413 unsigned int idx, len; 414 415 len = gen8_pd_range(*start, end, lvl--, &idx); 416 417 spin_lock(&pd->lock); 418 do { 419 struct i915_page_table *pt = pd->entry[idx]; 420 421 atomic_inc(&pt->used); 422 spin_unlock(&pd->lock); 423 424 if (lvl) { 425 __gen8_ppgtt_foreach(vm, as_pd(pt), start, end, lvl, 426 fn, data); 427 } else { 428 fn(vm, pt, data); 429 *start += gen8_pt_count(*start, end); 430 } 431 432 spin_lock(&pd->lock); 433 atomic_dec(&pt->used); 434 } while (idx++, --len); 435 spin_unlock(&pd->lock); 436 } 437 438 static void gen8_ppgtt_foreach(struct i915_address_space *vm, 439 u64 start, u64 length, 440 void (*fn)(struct i915_address_space *vm, 441 struct i915_page_table *pt, 442 void *data), 443 void *data) 444 { 445 start >>= GEN8_PTE_SHIFT; 446 length >>= GEN8_PTE_SHIFT; 447 448 __gen8_ppgtt_foreach(vm, i915_vm_to_ppgtt(vm)->pd, 449 &start, start + length, vm->top, 450 fn, data); 451 } 452 453 static __always_inline u64 454 gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, 455 struct i915_page_directory *pdp, 456 struct sgt_dma *iter, 457 u64 idx, 458 unsigned int pat_index, 459 u32 flags) 460 { 461 struct i915_page_directory *pd; 462 const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, pat_index, flags); 463 gen8_pte_t *vaddr; 464 465 pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); 466 vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); 467 do { 468 GEM_BUG_ON(sg_dma_len(iter->sg) < I915_GTT_PAGE_SIZE); 469 vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma; 470 471 iter->dma += I915_GTT_PAGE_SIZE; 472 if (iter->dma >= iter->max) { 473 iter->sg = __sg_next(iter->sg); 474 if (!iter->sg || sg_dma_len(iter->sg) == 0) { 475 idx = 0; 476 break; 477 } 478 479 iter->dma = sg_dma_address(iter->sg); 480 iter->max = iter->dma + sg_dma_len(iter->sg); 481 } 482 483 if (gen8_pd_index(++idx, 0) == 0) { 484 if (gen8_pd_index(idx, 1) == 0) { 485 /* Limited by sg length for 3lvl */ 486 if (gen8_pd_index(idx, 2) == 0) 487 break; 488 489 pd = pdp->entry[gen8_pd_index(idx, 2)]; 490 } 491 492 drm_clflush_virt_range(vaddr, PAGE_SIZE); 493 vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); 494 } 495 } while (1); 496 drm_clflush_virt_range(vaddr, PAGE_SIZE); 497 498 return idx; 499 } 500 501 static void 502 xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, 503 struct i915_vma_resource *vma_res, 504 struct sgt_dma *iter, 505 unsigned int pat_index, 506 u32 flags) 507 { 508 const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); 509 unsigned int rem = sg_dma_len(iter->sg); 510 u64 start = vma_res->start; 511 u64 end = start + vma_res->vma_size; 512 513 GEM_BUG_ON(!i915_vm_is_4lvl(vm)); 514 515 do { 516 struct i915_page_directory * const pdp = 517 gen8_pdp_for_page_address(vm, start); 518 struct i915_page_directory * const pd = 519 i915_pd_entry(pdp, __gen8_pte_index(start, 2)); 520 struct i915_page_table *pt = 521 i915_pt_entry(pd, __gen8_pte_index(start, 1)); 522 gen8_pte_t encode = pte_encode; 523 unsigned int page_size; 524 gen8_pte_t *vaddr; 525 u16 index, max, nent, i; 526 527 max = I915_PDES; 528 nent = 1; 529 530 if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M && 531 IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && 532 rem >= I915_GTT_PAGE_SIZE_2M && 533 !__gen8_pte_index(start, 0)) { 534 index = __gen8_pte_index(start, 1); 535 encode |= GEN8_PDE_PS_2M; 536 page_size = I915_GTT_PAGE_SIZE_2M; 537 538 vaddr = px_vaddr(pd); 539 } else { 540 index = __gen8_pte_index(start, 0); 541 page_size = I915_GTT_PAGE_SIZE; 542 543 if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { 544 /* 545 * Device local-memory on these platforms should 546 * always use 64K pages or larger (including GTT 547 * alignment), therefore if we know the whole 548 * page-table needs to be filled we can always 549 * safely use the compact-layout. Otherwise fall 550 * back to the TLB hint with PS64. If this is 551 * system memory we only bother with PS64. 552 */ 553 if ((encode & GEN12_PPGTT_PTE_LM) && 554 end - start >= SZ_2M && !index) { 555 index = __gen8_pte_index(start, 0) / 16; 556 page_size = I915_GTT_PAGE_SIZE_64K; 557 558 max /= 16; 559 560 vaddr = px_vaddr(pd); 561 vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K; 562 563 pt->is_compact = true; 564 } else if (IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && 565 rem >= I915_GTT_PAGE_SIZE_64K && 566 !(index % 16)) { 567 encode |= GEN12_PTE_PS64; 568 page_size = I915_GTT_PAGE_SIZE_64K; 569 nent = 16; 570 } 571 } 572 573 vaddr = px_vaddr(pt); 574 } 575 576 do { 577 GEM_BUG_ON(rem < page_size); 578 579 for (i = 0; i < nent; i++) { 580 vaddr[index++] = 581 encode | (iter->dma + i * 582 I915_GTT_PAGE_SIZE); 583 } 584 585 start += page_size; 586 iter->dma += page_size; 587 rem -= page_size; 588 if (iter->dma >= iter->max) { 589 iter->sg = __sg_next(iter->sg); 590 if (!iter->sg) 591 break; 592 593 rem = sg_dma_len(iter->sg); 594 if (!rem) 595 break; 596 597 iter->dma = sg_dma_address(iter->sg); 598 iter->max = iter->dma + rem; 599 600 if (unlikely(!IS_ALIGNED(iter->dma, page_size))) 601 break; 602 } 603 } while (rem >= page_size && index < max); 604 605 drm_clflush_virt_range(vaddr, PAGE_SIZE); 606 vma_res->page_sizes_gtt |= page_size; 607 } while (iter->sg && sg_dma_len(iter->sg)); 608 } 609 610 static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, 611 struct i915_vma_resource *vma_res, 612 struct sgt_dma *iter, 613 unsigned int pat_index, 614 u32 flags) 615 { 616 const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); 617 unsigned int rem = sg_dma_len(iter->sg); 618 u64 start = vma_res->start; 619 620 GEM_BUG_ON(!i915_vm_is_4lvl(vm)); 621 622 do { 623 struct i915_page_directory * const pdp = 624 gen8_pdp_for_page_address(vm, start); 625 struct i915_page_directory * const pd = 626 i915_pd_entry(pdp, __gen8_pte_index(start, 2)); 627 gen8_pte_t encode = pte_encode; 628 unsigned int maybe_64K = -1; 629 unsigned int page_size; 630 gen8_pte_t *vaddr; 631 u16 index; 632 633 if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M && 634 IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && 635 rem >= I915_GTT_PAGE_SIZE_2M && 636 !__gen8_pte_index(start, 0)) { 637 index = __gen8_pte_index(start, 1); 638 encode |= GEN8_PDE_PS_2M; 639 page_size = I915_GTT_PAGE_SIZE_2M; 640 641 vaddr = px_vaddr(pd); 642 } else { 643 struct i915_page_table *pt = 644 i915_pt_entry(pd, __gen8_pte_index(start, 1)); 645 646 index = __gen8_pte_index(start, 0); 647 page_size = I915_GTT_PAGE_SIZE; 648 649 if (!index && 650 vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K && 651 IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && 652 (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || 653 rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)) 654 maybe_64K = __gen8_pte_index(start, 1); 655 656 vaddr = px_vaddr(pt); 657 } 658 659 do { 660 GEM_BUG_ON(sg_dma_len(iter->sg) < page_size); 661 vaddr[index++] = encode | iter->dma; 662 663 start += page_size; 664 iter->dma += page_size; 665 rem -= page_size; 666 if (iter->dma >= iter->max) { 667 iter->sg = __sg_next(iter->sg); 668 if (!iter->sg) 669 break; 670 671 rem = sg_dma_len(iter->sg); 672 if (!rem) 673 break; 674 675 iter->dma = sg_dma_address(iter->sg); 676 iter->max = iter->dma + rem; 677 678 if (maybe_64K != -1 && index < I915_PDES && 679 !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && 680 (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || 681 rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))) 682 maybe_64K = -1; 683 684 if (unlikely(!IS_ALIGNED(iter->dma, page_size))) 685 break; 686 } 687 } while (rem >= page_size && index < I915_PDES); 688 689 drm_clflush_virt_range(vaddr, PAGE_SIZE); 690 691 /* 692 * Is it safe to mark the 2M block as 64K? -- Either we have 693 * filled whole page-table with 64K entries, or filled part of 694 * it and have reached the end of the sg table and we have 695 * enough padding. 696 */ 697 if (maybe_64K != -1 && 698 (index == I915_PDES || 699 (i915_vm_has_scratch_64K(vm) && 700 !iter->sg && IS_ALIGNED(vma_res->start + 701 vma_res->node_size, 702 I915_GTT_PAGE_SIZE_2M)))) { 703 vaddr = px_vaddr(pd); 704 vaddr[maybe_64K] |= GEN8_PDE_IPS_64K; 705 drm_clflush_virt_range(vaddr, PAGE_SIZE); 706 page_size = I915_GTT_PAGE_SIZE_64K; 707 708 /* 709 * We write all 4K page entries, even when using 64K 710 * pages. In order to verify that the HW isn't cheating 711 * by using the 4K PTE instead of the 64K PTE, we want 712 * to remove all the surplus entries. If the HW skipped 713 * the 64K PTE, it will read/write into the scratch page 714 * instead - which we detect as missing results during 715 * selftests. 716 */ 717 if (I915_SELFTEST_ONLY(vm->scrub_64K)) { 718 u16 i; 719 720 encode = vm->scratch[0]->encode; 721 vaddr = px_vaddr(i915_pt_entry(pd, maybe_64K)); 722 723 for (i = 1; i < index; i += 16) 724 memset64(vaddr + i, encode, 15); 725 726 drm_clflush_virt_range(vaddr, PAGE_SIZE); 727 } 728 } 729 730 vma_res->page_sizes_gtt |= page_size; 731 } while (iter->sg && sg_dma_len(iter->sg)); 732 } 733 734 static void gen8_ppgtt_insert(struct i915_address_space *vm, 735 struct i915_vma_resource *vma_res, 736 unsigned int pat_index, 737 u32 flags) 738 { 739 struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); 740 struct sgt_dma iter = sgt_dma(vma_res); 741 742 if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) { 743 if (GRAPHICS_VER_FULL(vm->i915) >= IP_VER(12, 50)) 744 xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags); 745 else 746 gen8_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags); 747 } else { 748 u64 idx = vma_res->start >> GEN8_PTE_SHIFT; 749 750 do { 751 struct i915_page_directory * const pdp = 752 gen8_pdp_for_page_index(vm, idx); 753 754 idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx, 755 pat_index, flags); 756 } while (idx); 757 758 vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; 759 } 760 } 761 762 static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, 763 dma_addr_t addr, 764 u64 offset, 765 unsigned int pat_index, 766 u32 flags) 767 { 768 u64 idx = offset >> GEN8_PTE_SHIFT; 769 struct i915_page_directory * const pdp = 770 gen8_pdp_for_page_index(vm, idx); 771 struct i915_page_directory *pd = 772 i915_pd_entry(pdp, gen8_pd_index(idx, 2)); 773 struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1)); 774 gen8_pte_t *vaddr; 775 776 GEM_BUG_ON(pt->is_compact); 777 778 vaddr = px_vaddr(pt); 779 vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, pat_index, flags); 780 drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); 781 } 782 783 static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, 784 dma_addr_t addr, 785 u64 offset, 786 unsigned int pat_index, 787 u32 flags) 788 { 789 u64 idx = offset >> GEN8_PTE_SHIFT; 790 struct i915_page_directory * const pdp = 791 gen8_pdp_for_page_index(vm, idx); 792 struct i915_page_directory *pd = 793 i915_pd_entry(pdp, gen8_pd_index(idx, 2)); 794 struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1)); 795 gen8_pte_t *vaddr; 796 797 GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K)); 798 GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K)); 799 800 /* XXX: we don't strictly need to use this layout */ 801 802 if (!pt->is_compact) { 803 vaddr = px_vaddr(pd); 804 vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K; 805 pt->is_compact = true; 806 } 807 808 vaddr = px_vaddr(pt); 809 vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, pat_index, flags); 810 } 811 812 static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm, 813 dma_addr_t addr, 814 u64 offset, 815 unsigned int pat_index, 816 u32 flags) 817 { 818 if (flags & PTE_LM) 819 return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset, 820 pat_index, flags); 821 822 return gen8_ppgtt_insert_entry(vm, addr, offset, pat_index, flags); 823 } 824 825 static int gen8_init_scratch(struct i915_address_space *vm) 826 { 827 u32 pte_flags; 828 int ret; 829 int i; 830 831 /* 832 * If everybody agrees to not to write into the scratch page, 833 * we can reuse it for all vm, keeping contexts and processes separate. 834 */ 835 if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) { 836 struct i915_address_space *clone = vm->gt->vm; 837 838 GEM_BUG_ON(!clone->has_read_only); 839 840 vm->scratch_order = clone->scratch_order; 841 for (i = 0; i <= vm->top; i++) 842 vm->scratch[i] = i915_gem_object_get(clone->scratch[i]); 843 844 return 0; 845 } 846 847 ret = setup_scratch_page(vm); 848 if (ret) 849 return ret; 850 851 pte_flags = vm->has_read_only; 852 if (i915_gem_object_is_lmem(vm->scratch[0])) 853 pte_flags |= PTE_LM; 854 855 vm->scratch[0]->encode = 856 vm->pte_encode(px_dma(vm->scratch[0]), 857 i915_gem_get_pat_index(vm->i915, 858 I915_CACHE_NONE), 859 pte_flags); 860 861 for (i = 1; i <= vm->top; i++) { 862 struct drm_i915_gem_object *obj; 863 864 obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); 865 if (IS_ERR(obj)) { 866 ret = PTR_ERR(obj); 867 goto free_scratch; 868 } 869 870 ret = map_pt_dma(vm, obj); 871 if (ret) { 872 i915_gem_object_put(obj); 873 goto free_scratch; 874 } 875 876 fill_px(obj, vm->scratch[i - 1]->encode); 877 obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_NONE); 878 879 vm->scratch[i] = obj; 880 } 881 882 return 0; 883 884 free_scratch: 885 while (i--) 886 i915_gem_object_put(vm->scratch[i]); 887 vm->scratch[0] = NULL; 888 return ret; 889 } 890 891 static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) 892 { 893 struct i915_address_space *vm = &ppgtt->vm; 894 struct i915_page_directory *pd = ppgtt->pd; 895 unsigned int idx; 896 897 GEM_BUG_ON(vm->top != 2); 898 GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES); 899 900 for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) { 901 struct i915_page_directory *pde; 902 int err; 903 904 pde = alloc_pd(vm); 905 if (IS_ERR(pde)) 906 return PTR_ERR(pde); 907 908 err = map_pt_dma(vm, pde->pt.base); 909 if (err) { 910 free_pd(vm, pde); 911 return err; 912 } 913 914 fill_px(pde, vm->scratch[1]->encode); 915 set_pd_entry(pd, idx, pde); 916 atomic_inc(px_used(pde)); /* keep pinned */ 917 } 918 wmb(); 919 920 return 0; 921 } 922 923 static struct i915_page_directory * 924 gen8_alloc_top_pd(struct i915_address_space *vm) 925 { 926 const unsigned int count = gen8_pd_top_count(vm); 927 struct i915_page_directory *pd; 928 int err; 929 930 GEM_BUG_ON(count > I915_PDES); 931 932 pd = __alloc_pd(count); 933 if (unlikely(!pd)) 934 return ERR_PTR(-ENOMEM); 935 936 pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); 937 if (IS_ERR(pd->pt.base)) { 938 err = PTR_ERR(pd->pt.base); 939 pd->pt.base = NULL; 940 goto err_pd; 941 } 942 943 err = map_pt_dma(vm, pd->pt.base); 944 if (err) 945 goto err_pd; 946 947 fill_page_dma(px_base(pd), vm->scratch[vm->top]->encode, count); 948 atomic_inc(px_used(pd)); /* mark as pinned */ 949 return pd; 950 951 err_pd: 952 free_pd(vm, pd); 953 return ERR_PTR(err); 954 } 955 956 /* 957 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 958 * with a net effect resembling a 2-level page table in normal x86 terms. Each 959 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 960 * space. 961 * 962 */ 963 struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, 964 unsigned long lmem_pt_obj_flags) 965 { 966 struct i915_page_directory *pd; 967 struct i915_ppgtt *ppgtt; 968 int err; 969 970 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 971 if (!ppgtt) 972 return ERR_PTR(-ENOMEM); 973 974 ppgtt_init(ppgtt, gt, lmem_pt_obj_flags); 975 ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2; 976 ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t)); 977 978 /* 979 * From bdw, there is hw support for read-only pages in the PPGTT. 980 * 981 * Gen11 has HSDES#:1807136187 unresolved. Disable ro support 982 * for now. 983 * 984 * Gen12 has inherited the same read-only fault issue from gen11. 985 */ 986 ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12); 987 988 if (HAS_LMEM(gt->i915)) 989 ppgtt->vm.alloc_pt_dma = alloc_pt_lmem; 990 else 991 ppgtt->vm.alloc_pt_dma = alloc_pt_dma; 992 993 /* 994 * Using SMEM here instead of LMEM has the advantage of not reserving 995 * high performance memory for a "never" used filler page. It also 996 * removes the device access that would be required to initialise the 997 * scratch page, reducing pressure on an even scarcer resource. 998 */ 999 ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; 1000 1001 if (GRAPHICS_VER(gt->i915) >= 12) 1002 ppgtt->vm.pte_encode = gen12_pte_encode; 1003 else 1004 ppgtt->vm.pte_encode = gen8_pte_encode; 1005 1006 ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; 1007 ppgtt->vm.insert_entries = gen8_ppgtt_insert; 1008 if (HAS_64K_PAGES(gt->i915)) 1009 ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry; 1010 else 1011 ppgtt->vm.insert_page = gen8_ppgtt_insert_entry; 1012 ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; 1013 ppgtt->vm.clear_range = gen8_ppgtt_clear; 1014 ppgtt->vm.foreach = gen8_ppgtt_foreach; 1015 ppgtt->vm.cleanup = gen8_ppgtt_cleanup; 1016 1017 err = gen8_init_scratch(&ppgtt->vm); 1018 if (err) 1019 goto err_put; 1020 1021 pd = gen8_alloc_top_pd(&ppgtt->vm); 1022 if (IS_ERR(pd)) { 1023 err = PTR_ERR(pd); 1024 goto err_put; 1025 } 1026 ppgtt->pd = pd; 1027 1028 if (!i915_vm_is_4lvl(&ppgtt->vm)) { 1029 err = gen8_preallocate_top_level_pdp(ppgtt); 1030 if (err) 1031 goto err_put; 1032 } 1033 1034 if (intel_vgpu_active(gt->i915)) 1035 gen8_ppgtt_notify_vgt(ppgtt, true); 1036 1037 return ppgtt; 1038 1039 err_put: 1040 i915_vm_put(&ppgtt->vm); 1041 return ERR_PTR(err); 1042 } 1043