1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/log2.h> 7 8 #include "gem/i915_gem_lmem.h" 9 10 #include "gen8_ppgtt.h" 11 #include "i915_scatterlist.h" 12 #include "i915_trace.h" 13 #include "i915_pvinfo.h" 14 #include "i915_vgpu.h" 15 #include "intel_gt.h" 16 #include "intel_gtt.h" 17 18 static u64 gen8_pde_encode(const dma_addr_t addr, 19 const enum i915_cache_level level) 20 { 21 u64 pde = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; 22 23 if (level != I915_CACHE_NONE) 24 pde |= PPAT_CACHED_PDE; 25 else 26 pde |= PPAT_UNCACHED; 27 28 return pde; 29 } 30 31 static u64 gen8_pte_encode(dma_addr_t addr, 32 unsigned int pat_index, 33 u32 flags) 34 { 35 gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; 36 37 if (unlikely(flags & PTE_READ_ONLY)) 38 pte &= ~GEN8_PAGE_RW; 39 40 /* 41 * For pre-gen12 platforms pat_index is the same as enum 42 * i915_cache_level, so the switch-case here is still valid. 43 * See translation table defined by LEGACY_CACHELEVEL. 44 */ 45 switch (pat_index) { 46 case I915_CACHE_NONE: 47 pte |= PPAT_UNCACHED; 48 break; 49 case I915_CACHE_WT: 50 pte |= PPAT_DISPLAY_ELLC; 51 break; 52 default: 53 pte |= PPAT_CACHED; 54 break; 55 } 56 57 return pte; 58 } 59 60 static u64 gen12_pte_encode(dma_addr_t addr, 61 unsigned int pat_index, 62 u32 flags) 63 { 64 gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; 65 66 if (unlikely(flags & PTE_READ_ONLY)) 67 pte &= ~GEN8_PAGE_RW; 68 69 if (flags & PTE_LM) 70 pte |= GEN12_PPGTT_PTE_LM; 71 72 if (pat_index & BIT(0)) 73 pte |= GEN12_PPGTT_PTE_PAT0; 74 75 if (pat_index & BIT(1)) 76 pte |= GEN12_PPGTT_PTE_PAT1; 77 78 if (pat_index & BIT(2)) 79 pte |= GEN12_PPGTT_PTE_PAT2; 80 81 if (pat_index & BIT(3)) 82 pte |= MTL_PPGTT_PTE_PAT3; 83 84 return pte; 85 } 86 87 static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) 88 { 89 struct drm_i915_private *i915 = ppgtt->vm.i915; 90 struct intel_uncore *uncore = ppgtt->vm.gt->uncore; 91 enum vgt_g2v_type msg; 92 int i; 93 94 if (create) 95 atomic_inc(px_used(ppgtt->pd)); /* never remove */ 96 else 97 atomic_dec(px_used(ppgtt->pd)); 98 99 mutex_lock(&i915->vgpu.lock); 100 101 if (i915_vm_is_4lvl(&ppgtt->vm)) { 102 const u64 daddr = px_dma(ppgtt->pd); 103 104 intel_uncore_write(uncore, 105 vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); 106 intel_uncore_write(uncore, 107 vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); 108 109 msg = create ? 110 VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : 111 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY; 112 } else { 113 for (i = 0; i < GEN8_3LVL_PDPES; i++) { 114 const u64 daddr = i915_page_dir_dma_addr(ppgtt, i); 115 116 intel_uncore_write(uncore, 117 vgtif_reg(pdp[i].lo), 118 lower_32_bits(daddr)); 119 intel_uncore_write(uncore, 120 vgtif_reg(pdp[i].hi), 121 upper_32_bits(daddr)); 122 } 123 124 msg = create ? 125 VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : 126 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY; 127 } 128 129 /* g2v_notify atomically (via hv trap) consumes the message packet. */ 130 intel_uncore_write(uncore, vgtif_reg(g2v_notify), msg); 131 132 mutex_unlock(&i915->vgpu.lock); 133 } 134 135 /* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */ 136 #define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */ 137 #define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE)) 138 #define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64)) 139 #define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES)) 140 #define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl)) 141 #define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl)) 142 #define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl)) 143 144 #define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt) 145 146 static unsigned int 147 gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx) 148 { 149 const int shift = gen8_pd_shift(lvl); 150 const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); 151 152 GEM_BUG_ON(start >= end); 153 end += ~mask >> gen8_pd_shift(1); 154 155 *idx = i915_pde_index(start, shift); 156 if ((start ^ end) & mask) 157 return GEN8_PDES - *idx; 158 else 159 return i915_pde_index(end, shift) - *idx; 160 } 161 162 static bool gen8_pd_contains(u64 start, u64 end, int lvl) 163 { 164 const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); 165 166 GEM_BUG_ON(start >= end); 167 return (start ^ end) & mask && (start & ~mask) == 0; 168 } 169 170 static unsigned int gen8_pt_count(u64 start, u64 end) 171 { 172 GEM_BUG_ON(start >= end); 173 if ((start ^ end) >> gen8_pd_shift(1)) 174 return GEN8_PDES - (start & (GEN8_PDES - 1)); 175 else 176 return end - start; 177 } 178 179 static unsigned int gen8_pd_top_count(const struct i915_address_space *vm) 180 { 181 unsigned int shift = __gen8_pte_shift(vm->top); 182 183 return (vm->total + (1ull << shift) - 1) >> shift; 184 } 185 186 static struct i915_page_directory * 187 gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx) 188 { 189 struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); 190 191 if (vm->top == 2) 192 return ppgtt->pd; 193 else 194 return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top)); 195 } 196 197 static struct i915_page_directory * 198 gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr) 199 { 200 return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT); 201 } 202 203 static void __gen8_ppgtt_cleanup(struct i915_address_space *vm, 204 struct i915_page_directory *pd, 205 int count, int lvl) 206 { 207 if (lvl) { 208 void **pde = pd->entry; 209 210 do { 211 if (!*pde) 212 continue; 213 214 __gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1); 215 } while (pde++, --count); 216 } 217 218 free_px(vm, &pd->pt, lvl); 219 } 220 221 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 222 { 223 struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 224 225 if (intel_vgpu_active(vm->i915)) 226 gen8_ppgtt_notify_vgt(ppgtt, false); 227 228 if (ppgtt->pd) 229 __gen8_ppgtt_cleanup(vm, ppgtt->pd, 230 gen8_pd_top_count(vm), vm->top); 231 232 free_scratch(vm); 233 } 234 235 static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, 236 struct i915_page_directory * const pd, 237 u64 start, const u64 end, int lvl) 238 { 239 const struct drm_i915_gem_object * const scratch = vm->scratch[lvl]; 240 unsigned int idx, len; 241 242 GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); 243 244 len = gen8_pd_range(start, end, lvl--, &idx); 245 DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", 246 __func__, vm, lvl + 1, start, end, 247 idx, len, atomic_read(px_used(pd))); 248 GEM_BUG_ON(!len || len >= atomic_read(px_used(pd))); 249 250 do { 251 struct i915_page_table *pt = pd->entry[idx]; 252 253 if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) && 254 gen8_pd_contains(start, end, lvl)) { 255 DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n", 256 __func__, vm, lvl + 1, idx, start, end); 257 clear_pd_entry(pd, idx, scratch); 258 __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl); 259 start += (u64)I915_PDES << gen8_pd_shift(lvl); 260 continue; 261 } 262 263 if (lvl) { 264 start = __gen8_ppgtt_clear(vm, as_pd(pt), 265 start, end, lvl); 266 } else { 267 unsigned int count; 268 unsigned int pte = gen8_pd_index(start, 0); 269 unsigned int num_ptes; 270 u64 *vaddr; 271 272 count = gen8_pt_count(start, end); 273 DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n", 274 __func__, vm, lvl, start, end, 275 gen8_pd_index(start, 0), count, 276 atomic_read(&pt->used)); 277 GEM_BUG_ON(!count || count >= atomic_read(&pt->used)); 278 279 num_ptes = count; 280 if (pt->is_compact) { 281 GEM_BUG_ON(num_ptes % 16); 282 GEM_BUG_ON(pte % 16); 283 num_ptes /= 16; 284 pte /= 16; 285 } 286 287 vaddr = px_vaddr(pt); 288 memset64(vaddr + pte, 289 vm->scratch[0]->encode, 290 num_ptes); 291 292 atomic_sub(count, &pt->used); 293 start += count; 294 } 295 296 if (release_pd_entry(pd, idx, pt, scratch)) 297 free_px(vm, pt, lvl); 298 } while (idx++, --len); 299 300 return start; 301 } 302 303 static void gen8_ppgtt_clear(struct i915_address_space *vm, 304 u64 start, u64 length) 305 { 306 GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); 307 GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); 308 GEM_BUG_ON(range_overflows(start, length, vm->total)); 309 310 start >>= GEN8_PTE_SHIFT; 311 length >>= GEN8_PTE_SHIFT; 312 GEM_BUG_ON(length == 0); 313 314 __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd, 315 start, start + length, vm->top); 316 } 317 318 static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, 319 struct i915_vm_pt_stash *stash, 320 struct i915_page_directory * const pd, 321 u64 * const start, const u64 end, int lvl) 322 { 323 unsigned int idx, len; 324 325 GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); 326 327 len = gen8_pd_range(*start, end, lvl--, &idx); 328 DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", 329 __func__, vm, lvl + 1, *start, end, 330 idx, len, atomic_read(px_used(pd))); 331 GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1)); 332 333 spin_lock(&pd->lock); 334 GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */ 335 do { 336 struct i915_page_table *pt = pd->entry[idx]; 337 338 if (!pt) { 339 spin_unlock(&pd->lock); 340 341 DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n", 342 __func__, vm, lvl + 1, idx); 343 344 pt = stash->pt[!!lvl]; 345 __i915_gem_object_pin_pages(pt->base); 346 347 fill_px(pt, vm->scratch[lvl]->encode); 348 349 spin_lock(&pd->lock); 350 if (likely(!pd->entry[idx])) { 351 stash->pt[!!lvl] = pt->stash; 352 atomic_set(&pt->used, 0); 353 set_pd_entry(pd, idx, pt); 354 } else { 355 pt = pd->entry[idx]; 356 } 357 } 358 359 if (lvl) { 360 atomic_inc(&pt->used); 361 spin_unlock(&pd->lock); 362 363 __gen8_ppgtt_alloc(vm, stash, 364 as_pd(pt), start, end, lvl); 365 366 spin_lock(&pd->lock); 367 atomic_dec(&pt->used); 368 GEM_BUG_ON(!atomic_read(&pt->used)); 369 } else { 370 unsigned int count = gen8_pt_count(*start, end); 371 372 DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n", 373 __func__, vm, lvl, *start, end, 374 gen8_pd_index(*start, 0), count, 375 atomic_read(&pt->used)); 376 377 atomic_add(count, &pt->used); 378 /* All other pdes may be simultaneously removed */ 379 GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES); 380 *start += count; 381 } 382 } while (idx++, --len); 383 spin_unlock(&pd->lock); 384 } 385 386 static void gen8_ppgtt_alloc(struct i915_address_space *vm, 387 struct i915_vm_pt_stash *stash, 388 u64 start, u64 length) 389 { 390 GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); 391 GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); 392 GEM_BUG_ON(range_overflows(start, length, vm->total)); 393 394 start >>= GEN8_PTE_SHIFT; 395 length >>= GEN8_PTE_SHIFT; 396 GEM_BUG_ON(length == 0); 397 398 __gen8_ppgtt_alloc(vm, stash, i915_vm_to_ppgtt(vm)->pd, 399 &start, start + length, vm->top); 400 } 401 402 static void __gen8_ppgtt_foreach(struct i915_address_space *vm, 403 struct i915_page_directory *pd, 404 u64 *start, u64 end, int lvl, 405 void (*fn)(struct i915_address_space *vm, 406 struct i915_page_table *pt, 407 void *data), 408 void *data) 409 { 410 unsigned int idx, len; 411 412 len = gen8_pd_range(*start, end, lvl--, &idx); 413 414 spin_lock(&pd->lock); 415 do { 416 struct i915_page_table *pt = pd->entry[idx]; 417 418 atomic_inc(&pt->used); 419 spin_unlock(&pd->lock); 420 421 if (lvl) { 422 __gen8_ppgtt_foreach(vm, as_pd(pt), start, end, lvl, 423 fn, data); 424 } else { 425 fn(vm, pt, data); 426 *start += gen8_pt_count(*start, end); 427 } 428 429 spin_lock(&pd->lock); 430 atomic_dec(&pt->used); 431 } while (idx++, --len); 432 spin_unlock(&pd->lock); 433 } 434 435 static void gen8_ppgtt_foreach(struct i915_address_space *vm, 436 u64 start, u64 length, 437 void (*fn)(struct i915_address_space *vm, 438 struct i915_page_table *pt, 439 void *data), 440 void *data) 441 { 442 start >>= GEN8_PTE_SHIFT; 443 length >>= GEN8_PTE_SHIFT; 444 445 __gen8_ppgtt_foreach(vm, i915_vm_to_ppgtt(vm)->pd, 446 &start, start + length, vm->top, 447 fn, data); 448 } 449 450 static __always_inline u64 451 gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, 452 struct i915_page_directory *pdp, 453 struct sgt_dma *iter, 454 u64 idx, 455 unsigned int pat_index, 456 u32 flags) 457 { 458 struct i915_page_directory *pd; 459 const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, pat_index, flags); 460 gen8_pte_t *vaddr; 461 462 pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); 463 vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); 464 do { 465 GEM_BUG_ON(sg_dma_len(iter->sg) < I915_GTT_PAGE_SIZE); 466 vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma; 467 468 iter->dma += I915_GTT_PAGE_SIZE; 469 if (iter->dma >= iter->max) { 470 iter->sg = __sg_next(iter->sg); 471 if (!iter->sg || sg_dma_len(iter->sg) == 0) { 472 idx = 0; 473 break; 474 } 475 476 iter->dma = sg_dma_address(iter->sg); 477 iter->max = iter->dma + sg_dma_len(iter->sg); 478 } 479 480 if (gen8_pd_index(++idx, 0) == 0) { 481 if (gen8_pd_index(idx, 1) == 0) { 482 /* Limited by sg length for 3lvl */ 483 if (gen8_pd_index(idx, 2) == 0) 484 break; 485 486 pd = pdp->entry[gen8_pd_index(idx, 2)]; 487 } 488 489 drm_clflush_virt_range(vaddr, PAGE_SIZE); 490 vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); 491 } 492 } while (1); 493 drm_clflush_virt_range(vaddr, PAGE_SIZE); 494 495 return idx; 496 } 497 498 static void 499 xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, 500 struct i915_vma_resource *vma_res, 501 struct sgt_dma *iter, 502 unsigned int pat_index, 503 u32 flags) 504 { 505 const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); 506 unsigned int rem = sg_dma_len(iter->sg); 507 u64 start = vma_res->start; 508 u64 end = start + vma_res->vma_size; 509 510 GEM_BUG_ON(!i915_vm_is_4lvl(vm)); 511 512 do { 513 struct i915_page_directory * const pdp = 514 gen8_pdp_for_page_address(vm, start); 515 struct i915_page_directory * const pd = 516 i915_pd_entry(pdp, __gen8_pte_index(start, 2)); 517 struct i915_page_table *pt = 518 i915_pt_entry(pd, __gen8_pte_index(start, 1)); 519 gen8_pte_t encode = pte_encode; 520 unsigned int page_size; 521 gen8_pte_t *vaddr; 522 u16 index, max, nent, i; 523 524 max = I915_PDES; 525 nent = 1; 526 527 if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M && 528 IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && 529 rem >= I915_GTT_PAGE_SIZE_2M && 530 !__gen8_pte_index(start, 0)) { 531 index = __gen8_pte_index(start, 1); 532 encode |= GEN8_PDE_PS_2M; 533 page_size = I915_GTT_PAGE_SIZE_2M; 534 535 vaddr = px_vaddr(pd); 536 } else { 537 index = __gen8_pte_index(start, 0); 538 page_size = I915_GTT_PAGE_SIZE; 539 540 if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { 541 /* 542 * Device local-memory on these platforms should 543 * always use 64K pages or larger (including GTT 544 * alignment), therefore if we know the whole 545 * page-table needs to be filled we can always 546 * safely use the compact-layout. Otherwise fall 547 * back to the TLB hint with PS64. If this is 548 * system memory we only bother with PS64. 549 */ 550 if ((encode & GEN12_PPGTT_PTE_LM) && 551 end - start >= SZ_2M && !index) { 552 index = __gen8_pte_index(start, 0) / 16; 553 page_size = I915_GTT_PAGE_SIZE_64K; 554 555 max /= 16; 556 557 vaddr = px_vaddr(pd); 558 vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K; 559 560 pt->is_compact = true; 561 } else if (IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && 562 rem >= I915_GTT_PAGE_SIZE_64K && 563 !(index % 16)) { 564 encode |= GEN12_PTE_PS64; 565 page_size = I915_GTT_PAGE_SIZE_64K; 566 nent = 16; 567 } 568 } 569 570 vaddr = px_vaddr(pt); 571 } 572 573 do { 574 GEM_BUG_ON(rem < page_size); 575 576 for (i = 0; i < nent; i++) { 577 vaddr[index++] = 578 encode | (iter->dma + i * 579 I915_GTT_PAGE_SIZE); 580 } 581 582 start += page_size; 583 iter->dma += page_size; 584 rem -= page_size; 585 if (iter->dma >= iter->max) { 586 iter->sg = __sg_next(iter->sg); 587 if (!iter->sg) 588 break; 589 590 rem = sg_dma_len(iter->sg); 591 if (!rem) 592 break; 593 594 iter->dma = sg_dma_address(iter->sg); 595 iter->max = iter->dma + rem; 596 597 if (unlikely(!IS_ALIGNED(iter->dma, page_size))) 598 break; 599 } 600 } while (rem >= page_size && index < max); 601 602 drm_clflush_virt_range(vaddr, PAGE_SIZE); 603 vma_res->page_sizes_gtt |= page_size; 604 } while (iter->sg && sg_dma_len(iter->sg)); 605 } 606 607 static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, 608 struct i915_vma_resource *vma_res, 609 struct sgt_dma *iter, 610 unsigned int pat_index, 611 u32 flags) 612 { 613 const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags); 614 unsigned int rem = sg_dma_len(iter->sg); 615 u64 start = vma_res->start; 616 617 GEM_BUG_ON(!i915_vm_is_4lvl(vm)); 618 619 do { 620 struct i915_page_directory * const pdp = 621 gen8_pdp_for_page_address(vm, start); 622 struct i915_page_directory * const pd = 623 i915_pd_entry(pdp, __gen8_pte_index(start, 2)); 624 gen8_pte_t encode = pte_encode; 625 unsigned int maybe_64K = -1; 626 unsigned int page_size; 627 gen8_pte_t *vaddr; 628 u16 index; 629 630 if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M && 631 IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && 632 rem >= I915_GTT_PAGE_SIZE_2M && 633 !__gen8_pte_index(start, 0)) { 634 index = __gen8_pte_index(start, 1); 635 encode |= GEN8_PDE_PS_2M; 636 page_size = I915_GTT_PAGE_SIZE_2M; 637 638 vaddr = px_vaddr(pd); 639 } else { 640 struct i915_page_table *pt = 641 i915_pt_entry(pd, __gen8_pte_index(start, 1)); 642 643 index = __gen8_pte_index(start, 0); 644 page_size = I915_GTT_PAGE_SIZE; 645 646 if (!index && 647 vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K && 648 IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && 649 (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || 650 rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)) 651 maybe_64K = __gen8_pte_index(start, 1); 652 653 vaddr = px_vaddr(pt); 654 } 655 656 do { 657 GEM_BUG_ON(sg_dma_len(iter->sg) < page_size); 658 vaddr[index++] = encode | iter->dma; 659 660 start += page_size; 661 iter->dma += page_size; 662 rem -= page_size; 663 if (iter->dma >= iter->max) { 664 iter->sg = __sg_next(iter->sg); 665 if (!iter->sg) 666 break; 667 668 rem = sg_dma_len(iter->sg); 669 if (!rem) 670 break; 671 672 iter->dma = sg_dma_address(iter->sg); 673 iter->max = iter->dma + rem; 674 675 if (maybe_64K != -1 && index < I915_PDES && 676 !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && 677 (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || 678 rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))) 679 maybe_64K = -1; 680 681 if (unlikely(!IS_ALIGNED(iter->dma, page_size))) 682 break; 683 } 684 } while (rem >= page_size && index < I915_PDES); 685 686 drm_clflush_virt_range(vaddr, PAGE_SIZE); 687 688 /* 689 * Is it safe to mark the 2M block as 64K? -- Either we have 690 * filled whole page-table with 64K entries, or filled part of 691 * it and have reached the end of the sg table and we have 692 * enough padding. 693 */ 694 if (maybe_64K != -1 && 695 (index == I915_PDES || 696 (i915_vm_has_scratch_64K(vm) && 697 !iter->sg && IS_ALIGNED(vma_res->start + 698 vma_res->node_size, 699 I915_GTT_PAGE_SIZE_2M)))) { 700 vaddr = px_vaddr(pd); 701 vaddr[maybe_64K] |= GEN8_PDE_IPS_64K; 702 drm_clflush_virt_range(vaddr, PAGE_SIZE); 703 page_size = I915_GTT_PAGE_SIZE_64K; 704 705 /* 706 * We write all 4K page entries, even when using 64K 707 * pages. In order to verify that the HW isn't cheating 708 * by using the 4K PTE instead of the 64K PTE, we want 709 * to remove all the surplus entries. If the HW skipped 710 * the 64K PTE, it will read/write into the scratch page 711 * instead - which we detect as missing results during 712 * selftests. 713 */ 714 if (I915_SELFTEST_ONLY(vm->scrub_64K)) { 715 u16 i; 716 717 encode = vm->scratch[0]->encode; 718 vaddr = px_vaddr(i915_pt_entry(pd, maybe_64K)); 719 720 for (i = 1; i < index; i += 16) 721 memset64(vaddr + i, encode, 15); 722 723 drm_clflush_virt_range(vaddr, PAGE_SIZE); 724 } 725 } 726 727 vma_res->page_sizes_gtt |= page_size; 728 } while (iter->sg && sg_dma_len(iter->sg)); 729 } 730 731 static void gen8_ppgtt_insert(struct i915_address_space *vm, 732 struct i915_vma_resource *vma_res, 733 unsigned int pat_index, 734 u32 flags) 735 { 736 struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); 737 struct sgt_dma iter = sgt_dma(vma_res); 738 739 if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) { 740 if (GRAPHICS_VER_FULL(vm->i915) >= IP_VER(12, 50)) 741 xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags); 742 else 743 gen8_ppgtt_insert_huge(vm, vma_res, &iter, pat_index, flags); 744 } else { 745 u64 idx = vma_res->start >> GEN8_PTE_SHIFT; 746 747 do { 748 struct i915_page_directory * const pdp = 749 gen8_pdp_for_page_index(vm, idx); 750 751 idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx, 752 pat_index, flags); 753 } while (idx); 754 755 vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; 756 } 757 } 758 759 static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, 760 dma_addr_t addr, 761 u64 offset, 762 unsigned int pat_index, 763 u32 flags) 764 { 765 u64 idx = offset >> GEN8_PTE_SHIFT; 766 struct i915_page_directory * const pdp = 767 gen8_pdp_for_page_index(vm, idx); 768 struct i915_page_directory *pd = 769 i915_pd_entry(pdp, gen8_pd_index(idx, 2)); 770 struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1)); 771 gen8_pte_t *vaddr; 772 773 GEM_BUG_ON(pt->is_compact); 774 775 vaddr = px_vaddr(pt); 776 vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, pat_index, flags); 777 drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); 778 } 779 780 static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, 781 dma_addr_t addr, 782 u64 offset, 783 unsigned int pat_index, 784 u32 flags) 785 { 786 u64 idx = offset >> GEN8_PTE_SHIFT; 787 struct i915_page_directory * const pdp = 788 gen8_pdp_for_page_index(vm, idx); 789 struct i915_page_directory *pd = 790 i915_pd_entry(pdp, gen8_pd_index(idx, 2)); 791 struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1)); 792 gen8_pte_t *vaddr; 793 794 GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K)); 795 GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K)); 796 797 /* XXX: we don't strictly need to use this layout */ 798 799 if (!pt->is_compact) { 800 vaddr = px_vaddr(pd); 801 vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K; 802 pt->is_compact = true; 803 } 804 805 vaddr = px_vaddr(pt); 806 vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, pat_index, flags); 807 } 808 809 static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm, 810 dma_addr_t addr, 811 u64 offset, 812 unsigned int pat_index, 813 u32 flags) 814 { 815 if (flags & PTE_LM) 816 return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset, 817 pat_index, flags); 818 819 return gen8_ppgtt_insert_entry(vm, addr, offset, pat_index, flags); 820 } 821 822 static int gen8_init_scratch(struct i915_address_space *vm) 823 { 824 u32 pte_flags; 825 int ret; 826 int i; 827 828 /* 829 * If everybody agrees to not to write into the scratch page, 830 * we can reuse it for all vm, keeping contexts and processes separate. 831 */ 832 if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) { 833 struct i915_address_space *clone = vm->gt->vm; 834 835 GEM_BUG_ON(!clone->has_read_only); 836 837 vm->scratch_order = clone->scratch_order; 838 for (i = 0; i <= vm->top; i++) 839 vm->scratch[i] = i915_gem_object_get(clone->scratch[i]); 840 841 return 0; 842 } 843 844 ret = setup_scratch_page(vm); 845 if (ret) 846 return ret; 847 848 pte_flags = vm->has_read_only; 849 if (i915_gem_object_is_lmem(vm->scratch[0])) 850 pte_flags |= PTE_LM; 851 852 vm->scratch[0]->encode = 853 vm->pte_encode(px_dma(vm->scratch[0]), 854 i915_gem_get_pat_index(vm->i915, 855 I915_CACHE_NONE), 856 pte_flags); 857 858 for (i = 1; i <= vm->top; i++) { 859 struct drm_i915_gem_object *obj; 860 861 obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); 862 if (IS_ERR(obj)) { 863 ret = PTR_ERR(obj); 864 goto free_scratch; 865 } 866 867 ret = map_pt_dma(vm, obj); 868 if (ret) { 869 i915_gem_object_put(obj); 870 goto free_scratch; 871 } 872 873 fill_px(obj, vm->scratch[i - 1]->encode); 874 obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_NONE); 875 876 vm->scratch[i] = obj; 877 } 878 879 return 0; 880 881 free_scratch: 882 while (i--) 883 i915_gem_object_put(vm->scratch[i]); 884 vm->scratch[0] = NULL; 885 return ret; 886 } 887 888 static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) 889 { 890 struct i915_address_space *vm = &ppgtt->vm; 891 struct i915_page_directory *pd = ppgtt->pd; 892 unsigned int idx; 893 894 GEM_BUG_ON(vm->top != 2); 895 GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES); 896 897 for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) { 898 struct i915_page_directory *pde; 899 int err; 900 901 pde = alloc_pd(vm); 902 if (IS_ERR(pde)) 903 return PTR_ERR(pde); 904 905 err = map_pt_dma(vm, pde->pt.base); 906 if (err) { 907 free_pd(vm, pde); 908 return err; 909 } 910 911 fill_px(pde, vm->scratch[1]->encode); 912 set_pd_entry(pd, idx, pde); 913 atomic_inc(px_used(pde)); /* keep pinned */ 914 } 915 wmb(); 916 917 return 0; 918 } 919 920 static struct i915_page_directory * 921 gen8_alloc_top_pd(struct i915_address_space *vm) 922 { 923 const unsigned int count = gen8_pd_top_count(vm); 924 struct i915_page_directory *pd; 925 int err; 926 927 GEM_BUG_ON(count > I915_PDES); 928 929 pd = __alloc_pd(count); 930 if (unlikely(!pd)) 931 return ERR_PTR(-ENOMEM); 932 933 pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); 934 if (IS_ERR(pd->pt.base)) { 935 err = PTR_ERR(pd->pt.base); 936 pd->pt.base = NULL; 937 goto err_pd; 938 } 939 940 err = map_pt_dma(vm, pd->pt.base); 941 if (err) 942 goto err_pd; 943 944 fill_page_dma(px_base(pd), vm->scratch[vm->top]->encode, count); 945 atomic_inc(px_used(pd)); /* mark as pinned */ 946 return pd; 947 948 err_pd: 949 free_pd(vm, pd); 950 return ERR_PTR(err); 951 } 952 953 /* 954 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 955 * with a net effect resembling a 2-level page table in normal x86 terms. Each 956 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 957 * space. 958 * 959 */ 960 struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, 961 unsigned long lmem_pt_obj_flags) 962 { 963 struct i915_page_directory *pd; 964 struct i915_ppgtt *ppgtt; 965 int err; 966 967 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 968 if (!ppgtt) 969 return ERR_PTR(-ENOMEM); 970 971 ppgtt_init(ppgtt, gt, lmem_pt_obj_flags); 972 ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2; 973 ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t)); 974 975 /* 976 * From bdw, there is hw support for read-only pages in the PPGTT. 977 * 978 * Gen11 has HSDES#:1807136187 unresolved. Disable ro support 979 * for now. 980 * 981 * Gen12 has inherited the same read-only fault issue from gen11. 982 */ 983 ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12); 984 985 if (HAS_LMEM(gt->i915)) 986 ppgtt->vm.alloc_pt_dma = alloc_pt_lmem; 987 else 988 ppgtt->vm.alloc_pt_dma = alloc_pt_dma; 989 990 /* 991 * Using SMEM here instead of LMEM has the advantage of not reserving 992 * high performance memory for a "never" used filler page. It also 993 * removes the device access that would be required to initialise the 994 * scratch page, reducing pressure on an even scarcer resource. 995 */ 996 ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; 997 998 if (GRAPHICS_VER(gt->i915) >= 12) 999 ppgtt->vm.pte_encode = gen12_pte_encode; 1000 else 1001 ppgtt->vm.pte_encode = gen8_pte_encode; 1002 1003 ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; 1004 ppgtt->vm.insert_entries = gen8_ppgtt_insert; 1005 if (HAS_64K_PAGES(gt->i915)) 1006 ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry; 1007 else 1008 ppgtt->vm.insert_page = gen8_ppgtt_insert_entry; 1009 ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; 1010 ppgtt->vm.clear_range = gen8_ppgtt_clear; 1011 ppgtt->vm.foreach = gen8_ppgtt_foreach; 1012 ppgtt->vm.cleanup = gen8_ppgtt_cleanup; 1013 1014 err = gen8_init_scratch(&ppgtt->vm); 1015 if (err) 1016 goto err_put; 1017 1018 pd = gen8_alloc_top_pd(&ppgtt->vm); 1019 if (IS_ERR(pd)) { 1020 err = PTR_ERR(pd); 1021 goto err_put; 1022 } 1023 ppgtt->pd = pd; 1024 1025 if (!i915_vm_is_4lvl(&ppgtt->vm)) { 1026 err = gen8_preallocate_top_level_pdp(ppgtt); 1027 if (err) 1028 goto err_put; 1029 } 1030 1031 if (intel_vgpu_active(gt->i915)) 1032 gen8_ppgtt_notify_vgt(ppgtt, true); 1033 1034 return ppgtt; 1035 1036 err_put: 1037 i915_vm_put(&ppgtt->vm); 1038 return ERR_PTR(err); 1039 } 1040