1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/log2.h> 7 8 #include "gem/i915_gem_lmem.h" 9 10 #include "gen8_ppgtt.h" 11 #include "i915_scatterlist.h" 12 #include "i915_trace.h" 13 #include "i915_pvinfo.h" 14 #include "i915_vgpu.h" 15 #include "intel_gt.h" 16 #include "intel_gtt.h" 17 18 static u64 gen8_pde_encode(const dma_addr_t addr, 19 const enum i915_cache_level level) 20 { 21 u64 pde = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; 22 23 if (level != I915_CACHE_NONE) 24 pde |= PPAT_CACHED_PDE; 25 else 26 pde |= PPAT_UNCACHED; 27 28 return pde; 29 } 30 31 static u64 gen8_pte_encode(dma_addr_t addr, 32 enum i915_cache_level level, 33 u32 flags) 34 { 35 gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW; 36 37 if (unlikely(flags & PTE_READ_ONLY)) 38 pte &= ~GEN8_PAGE_RW; 39 40 if (flags & PTE_LM) 41 pte |= GEN12_PPGTT_PTE_LM; 42 43 switch (level) { 44 case I915_CACHE_NONE: 45 pte |= PPAT_UNCACHED; 46 break; 47 case I915_CACHE_WT: 48 pte |= PPAT_DISPLAY_ELLC; 49 break; 50 default: 51 pte |= PPAT_CACHED; 52 break; 53 } 54 55 return pte; 56 } 57 58 static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) 59 { 60 struct drm_i915_private *i915 = ppgtt->vm.i915; 61 struct intel_uncore *uncore = ppgtt->vm.gt->uncore; 62 enum vgt_g2v_type msg; 63 int i; 64 65 if (create) 66 atomic_inc(px_used(ppgtt->pd)); /* never remove */ 67 else 68 atomic_dec(px_used(ppgtt->pd)); 69 70 mutex_lock(&i915->vgpu.lock); 71 72 if (i915_vm_is_4lvl(&ppgtt->vm)) { 73 const u64 daddr = px_dma(ppgtt->pd); 74 75 intel_uncore_write(uncore, 76 vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); 77 intel_uncore_write(uncore, 78 vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); 79 80 msg = create ? 81 VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : 82 VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY; 83 } else { 84 for (i = 0; i < GEN8_3LVL_PDPES; i++) { 85 const u64 daddr = i915_page_dir_dma_addr(ppgtt, i); 86 87 intel_uncore_write(uncore, 88 vgtif_reg(pdp[i].lo), 89 lower_32_bits(daddr)); 90 intel_uncore_write(uncore, 91 vgtif_reg(pdp[i].hi), 92 upper_32_bits(daddr)); 93 } 94 95 msg = create ? 96 VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : 97 VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY; 98 } 99 100 /* g2v_notify atomically (via hv trap) consumes the message packet. */ 101 intel_uncore_write(uncore, vgtif_reg(g2v_notify), msg); 102 103 mutex_unlock(&i915->vgpu.lock); 104 } 105 106 /* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */ 107 #define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */ 108 #define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE)) 109 #define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64)) 110 #define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES)) 111 #define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl)) 112 #define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl)) 113 #define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl)) 114 115 #define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt) 116 117 static unsigned int 118 gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx) 119 { 120 const int shift = gen8_pd_shift(lvl); 121 const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); 122 123 GEM_BUG_ON(start >= end); 124 end += ~mask >> gen8_pd_shift(1); 125 126 *idx = i915_pde_index(start, shift); 127 if ((start ^ end) & mask) 128 return GEN8_PDES - *idx; 129 else 130 return i915_pde_index(end, shift) - *idx; 131 } 132 133 static bool gen8_pd_contains(u64 start, u64 end, int lvl) 134 { 135 const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); 136 137 GEM_BUG_ON(start >= end); 138 return (start ^ end) & mask && (start & ~mask) == 0; 139 } 140 141 static unsigned int gen8_pt_count(u64 start, u64 end) 142 { 143 GEM_BUG_ON(start >= end); 144 if ((start ^ end) >> gen8_pd_shift(1)) 145 return GEN8_PDES - (start & (GEN8_PDES - 1)); 146 else 147 return end - start; 148 } 149 150 static unsigned int gen8_pd_top_count(const struct i915_address_space *vm) 151 { 152 unsigned int shift = __gen8_pte_shift(vm->top); 153 154 return (vm->total + (1ull << shift) - 1) >> shift; 155 } 156 157 static struct i915_page_directory * 158 gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx) 159 { 160 struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); 161 162 if (vm->top == 2) 163 return ppgtt->pd; 164 else 165 return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top)); 166 } 167 168 static struct i915_page_directory * 169 gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr) 170 { 171 return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT); 172 } 173 174 static void __gen8_ppgtt_cleanup(struct i915_address_space *vm, 175 struct i915_page_directory *pd, 176 int count, int lvl) 177 { 178 if (lvl) { 179 void **pde = pd->entry; 180 181 do { 182 if (!*pde) 183 continue; 184 185 __gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1); 186 } while (pde++, --count); 187 } 188 189 free_px(vm, &pd->pt, lvl); 190 } 191 192 static void gen8_ppgtt_cleanup(struct i915_address_space *vm) 193 { 194 struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 195 196 if (intel_vgpu_active(vm->i915)) 197 gen8_ppgtt_notify_vgt(ppgtt, false); 198 199 __gen8_ppgtt_cleanup(vm, ppgtt->pd, gen8_pd_top_count(vm), vm->top); 200 free_scratch(vm); 201 } 202 203 static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, 204 struct i915_page_directory * const pd, 205 u64 start, const u64 end, int lvl) 206 { 207 const struct drm_i915_gem_object * const scratch = vm->scratch[lvl]; 208 unsigned int idx, len; 209 210 GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); 211 212 len = gen8_pd_range(start, end, lvl--, &idx); 213 DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", 214 __func__, vm, lvl + 1, start, end, 215 idx, len, atomic_read(px_used(pd))); 216 GEM_BUG_ON(!len || len >= atomic_read(px_used(pd))); 217 218 do { 219 struct i915_page_table *pt = pd->entry[idx]; 220 221 if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) && 222 gen8_pd_contains(start, end, lvl)) { 223 DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n", 224 __func__, vm, lvl + 1, idx, start, end); 225 clear_pd_entry(pd, idx, scratch); 226 __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl); 227 start += (u64)I915_PDES << gen8_pd_shift(lvl); 228 continue; 229 } 230 231 if (lvl) { 232 start = __gen8_ppgtt_clear(vm, as_pd(pt), 233 start, end, lvl); 234 } else { 235 unsigned int count; 236 unsigned int pte = gen8_pd_index(start, 0); 237 unsigned int num_ptes; 238 u64 *vaddr; 239 240 count = gen8_pt_count(start, end); 241 DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n", 242 __func__, vm, lvl, start, end, 243 gen8_pd_index(start, 0), count, 244 atomic_read(&pt->used)); 245 GEM_BUG_ON(!count || count >= atomic_read(&pt->used)); 246 247 num_ptes = count; 248 if (pt->is_compact) { 249 GEM_BUG_ON(num_ptes % 16); 250 GEM_BUG_ON(pte % 16); 251 num_ptes /= 16; 252 pte /= 16; 253 } 254 255 vaddr = px_vaddr(pt); 256 memset64(vaddr + pte, 257 vm->scratch[0]->encode, 258 num_ptes); 259 260 atomic_sub(count, &pt->used); 261 start += count; 262 } 263 264 if (release_pd_entry(pd, idx, pt, scratch)) 265 free_px(vm, pt, lvl); 266 } while (idx++, --len); 267 268 return start; 269 } 270 271 static void gen8_ppgtt_clear(struct i915_address_space *vm, 272 u64 start, u64 length) 273 { 274 GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); 275 GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); 276 GEM_BUG_ON(range_overflows(start, length, vm->total)); 277 278 start >>= GEN8_PTE_SHIFT; 279 length >>= GEN8_PTE_SHIFT; 280 GEM_BUG_ON(length == 0); 281 282 __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd, 283 start, start + length, vm->top); 284 } 285 286 static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, 287 struct i915_vm_pt_stash *stash, 288 struct i915_page_directory * const pd, 289 u64 * const start, const u64 end, int lvl) 290 { 291 unsigned int idx, len; 292 293 GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); 294 295 len = gen8_pd_range(*start, end, lvl--, &idx); 296 DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", 297 __func__, vm, lvl + 1, *start, end, 298 idx, len, atomic_read(px_used(pd))); 299 GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1)); 300 301 spin_lock(&pd->lock); 302 GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */ 303 do { 304 struct i915_page_table *pt = pd->entry[idx]; 305 306 if (!pt) { 307 spin_unlock(&pd->lock); 308 309 DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n", 310 __func__, vm, lvl + 1, idx); 311 312 pt = stash->pt[!!lvl]; 313 __i915_gem_object_pin_pages(pt->base); 314 315 fill_px(pt, vm->scratch[lvl]->encode); 316 317 spin_lock(&pd->lock); 318 if (likely(!pd->entry[idx])) { 319 stash->pt[!!lvl] = pt->stash; 320 atomic_set(&pt->used, 0); 321 set_pd_entry(pd, idx, pt); 322 } else { 323 pt = pd->entry[idx]; 324 } 325 } 326 327 if (lvl) { 328 atomic_inc(&pt->used); 329 spin_unlock(&pd->lock); 330 331 __gen8_ppgtt_alloc(vm, stash, 332 as_pd(pt), start, end, lvl); 333 334 spin_lock(&pd->lock); 335 atomic_dec(&pt->used); 336 GEM_BUG_ON(!atomic_read(&pt->used)); 337 } else { 338 unsigned int count = gen8_pt_count(*start, end); 339 340 DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n", 341 __func__, vm, lvl, *start, end, 342 gen8_pd_index(*start, 0), count, 343 atomic_read(&pt->used)); 344 345 atomic_add(count, &pt->used); 346 /* All other pdes may be simultaneously removed */ 347 GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES); 348 *start += count; 349 } 350 } while (idx++, --len); 351 spin_unlock(&pd->lock); 352 } 353 354 static void gen8_ppgtt_alloc(struct i915_address_space *vm, 355 struct i915_vm_pt_stash *stash, 356 u64 start, u64 length) 357 { 358 GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); 359 GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); 360 GEM_BUG_ON(range_overflows(start, length, vm->total)); 361 362 start >>= GEN8_PTE_SHIFT; 363 length >>= GEN8_PTE_SHIFT; 364 GEM_BUG_ON(length == 0); 365 366 __gen8_ppgtt_alloc(vm, stash, i915_vm_to_ppgtt(vm)->pd, 367 &start, start + length, vm->top); 368 } 369 370 static void __gen8_ppgtt_foreach(struct i915_address_space *vm, 371 struct i915_page_directory *pd, 372 u64 *start, u64 end, int lvl, 373 void (*fn)(struct i915_address_space *vm, 374 struct i915_page_table *pt, 375 void *data), 376 void *data) 377 { 378 unsigned int idx, len; 379 380 len = gen8_pd_range(*start, end, lvl--, &idx); 381 382 spin_lock(&pd->lock); 383 do { 384 struct i915_page_table *pt = pd->entry[idx]; 385 386 atomic_inc(&pt->used); 387 spin_unlock(&pd->lock); 388 389 if (lvl) { 390 __gen8_ppgtt_foreach(vm, as_pd(pt), start, end, lvl, 391 fn, data); 392 } else { 393 fn(vm, pt, data); 394 *start += gen8_pt_count(*start, end); 395 } 396 397 spin_lock(&pd->lock); 398 atomic_dec(&pt->used); 399 } while (idx++, --len); 400 spin_unlock(&pd->lock); 401 } 402 403 static void gen8_ppgtt_foreach(struct i915_address_space *vm, 404 u64 start, u64 length, 405 void (*fn)(struct i915_address_space *vm, 406 struct i915_page_table *pt, 407 void *data), 408 void *data) 409 { 410 start >>= GEN8_PTE_SHIFT; 411 length >>= GEN8_PTE_SHIFT; 412 413 __gen8_ppgtt_foreach(vm, i915_vm_to_ppgtt(vm)->pd, 414 &start, start + length, vm->top, 415 fn, data); 416 } 417 418 static __always_inline u64 419 gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, 420 struct i915_page_directory *pdp, 421 struct sgt_dma *iter, 422 u64 idx, 423 enum i915_cache_level cache_level, 424 u32 flags) 425 { 426 struct i915_page_directory *pd; 427 const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); 428 gen8_pte_t *vaddr; 429 430 pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); 431 vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); 432 do { 433 GEM_BUG_ON(sg_dma_len(iter->sg) < I915_GTT_PAGE_SIZE); 434 vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma; 435 436 iter->dma += I915_GTT_PAGE_SIZE; 437 if (iter->dma >= iter->max) { 438 iter->sg = __sg_next(iter->sg); 439 if (!iter->sg || sg_dma_len(iter->sg) == 0) { 440 idx = 0; 441 break; 442 } 443 444 iter->dma = sg_dma_address(iter->sg); 445 iter->max = iter->dma + sg_dma_len(iter->sg); 446 } 447 448 if (gen8_pd_index(++idx, 0) == 0) { 449 if (gen8_pd_index(idx, 1) == 0) { 450 /* Limited by sg length for 3lvl */ 451 if (gen8_pd_index(idx, 2) == 0) 452 break; 453 454 pd = pdp->entry[gen8_pd_index(idx, 2)]; 455 } 456 457 clflush_cache_range(vaddr, PAGE_SIZE); 458 vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1))); 459 } 460 } while (1); 461 clflush_cache_range(vaddr, PAGE_SIZE); 462 463 return idx; 464 } 465 466 static void 467 xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm, 468 struct i915_vma_resource *vma_res, 469 struct sgt_dma *iter, 470 enum i915_cache_level cache_level, 471 u32 flags) 472 { 473 const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags); 474 unsigned int rem = sg_dma_len(iter->sg); 475 u64 start = vma_res->start; 476 477 GEM_BUG_ON(!i915_vm_is_4lvl(vm)); 478 479 do { 480 struct i915_page_directory * const pdp = 481 gen8_pdp_for_page_address(vm, start); 482 struct i915_page_directory * const pd = 483 i915_pd_entry(pdp, __gen8_pte_index(start, 2)); 484 struct i915_page_table *pt = 485 i915_pt_entry(pd, __gen8_pte_index(start, 1)); 486 gen8_pte_t encode = pte_encode; 487 unsigned int page_size; 488 gen8_pte_t *vaddr; 489 u16 index, max; 490 491 max = I915_PDES; 492 493 if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M && 494 IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && 495 rem >= I915_GTT_PAGE_SIZE_2M && 496 !__gen8_pte_index(start, 0)) { 497 index = __gen8_pte_index(start, 1); 498 encode |= GEN8_PDE_PS_2M; 499 page_size = I915_GTT_PAGE_SIZE_2M; 500 501 vaddr = px_vaddr(pd); 502 } else { 503 if (encode & GEN12_PPGTT_PTE_LM) { 504 GEM_BUG_ON(__gen8_pte_index(start, 0) % 16); 505 GEM_BUG_ON(rem < I915_GTT_PAGE_SIZE_64K); 506 GEM_BUG_ON(!IS_ALIGNED(iter->dma, 507 I915_GTT_PAGE_SIZE_64K)); 508 509 index = __gen8_pte_index(start, 0) / 16; 510 page_size = I915_GTT_PAGE_SIZE_64K; 511 512 max /= 16; 513 514 vaddr = px_vaddr(pd); 515 vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K; 516 517 pt->is_compact = true; 518 } else { 519 GEM_BUG_ON(pt->is_compact); 520 index = __gen8_pte_index(start, 0); 521 page_size = I915_GTT_PAGE_SIZE; 522 } 523 524 vaddr = px_vaddr(pt); 525 } 526 527 do { 528 GEM_BUG_ON(rem < page_size); 529 vaddr[index++] = encode | iter->dma; 530 531 start += page_size; 532 iter->dma += page_size; 533 rem -= page_size; 534 if (iter->dma >= iter->max) { 535 iter->sg = __sg_next(iter->sg); 536 if (!iter->sg) 537 break; 538 539 rem = sg_dma_len(iter->sg); 540 if (!rem) 541 break; 542 543 iter->dma = sg_dma_address(iter->sg); 544 iter->max = iter->dma + rem; 545 546 if (unlikely(!IS_ALIGNED(iter->dma, page_size))) 547 break; 548 } 549 } while (rem >= page_size && index < max); 550 551 vma_res->page_sizes_gtt |= page_size; 552 } while (iter->sg && sg_dma_len(iter->sg)); 553 } 554 555 static void gen8_ppgtt_insert_huge(struct i915_address_space *vm, 556 struct i915_vma_resource *vma_res, 557 struct sgt_dma *iter, 558 enum i915_cache_level cache_level, 559 u32 flags) 560 { 561 const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); 562 unsigned int rem = sg_dma_len(iter->sg); 563 u64 start = vma_res->start; 564 565 GEM_BUG_ON(!i915_vm_is_4lvl(vm)); 566 567 do { 568 struct i915_page_directory * const pdp = 569 gen8_pdp_for_page_address(vm, start); 570 struct i915_page_directory * const pd = 571 i915_pd_entry(pdp, __gen8_pte_index(start, 2)); 572 gen8_pte_t encode = pte_encode; 573 unsigned int maybe_64K = -1; 574 unsigned int page_size; 575 gen8_pte_t *vaddr; 576 u16 index; 577 578 if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M && 579 IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && 580 rem >= I915_GTT_PAGE_SIZE_2M && 581 !__gen8_pte_index(start, 0)) { 582 index = __gen8_pte_index(start, 1); 583 encode |= GEN8_PDE_PS_2M; 584 page_size = I915_GTT_PAGE_SIZE_2M; 585 586 vaddr = px_vaddr(pd); 587 } else { 588 struct i915_page_table *pt = 589 i915_pt_entry(pd, __gen8_pte_index(start, 1)); 590 591 index = __gen8_pte_index(start, 0); 592 page_size = I915_GTT_PAGE_SIZE; 593 594 if (!index && 595 vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K && 596 IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && 597 (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || 598 rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)) 599 maybe_64K = __gen8_pte_index(start, 1); 600 601 vaddr = px_vaddr(pt); 602 } 603 604 do { 605 GEM_BUG_ON(sg_dma_len(iter->sg) < page_size); 606 vaddr[index++] = encode | iter->dma; 607 608 start += page_size; 609 iter->dma += page_size; 610 rem -= page_size; 611 if (iter->dma >= iter->max) { 612 iter->sg = __sg_next(iter->sg); 613 if (!iter->sg) 614 break; 615 616 rem = sg_dma_len(iter->sg); 617 if (!rem) 618 break; 619 620 iter->dma = sg_dma_address(iter->sg); 621 iter->max = iter->dma + rem; 622 623 if (maybe_64K != -1 && index < I915_PDES && 624 !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && 625 (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || 626 rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))) 627 maybe_64K = -1; 628 629 if (unlikely(!IS_ALIGNED(iter->dma, page_size))) 630 break; 631 } 632 } while (rem >= page_size && index < I915_PDES); 633 634 clflush_cache_range(vaddr, PAGE_SIZE); 635 636 /* 637 * Is it safe to mark the 2M block as 64K? -- Either we have 638 * filled whole page-table with 64K entries, or filled part of 639 * it and have reached the end of the sg table and we have 640 * enough padding. 641 */ 642 if (maybe_64K != -1 && 643 (index == I915_PDES || 644 (i915_vm_has_scratch_64K(vm) && 645 !iter->sg && IS_ALIGNED(vma_res->start + 646 vma_res->node_size, 647 I915_GTT_PAGE_SIZE_2M)))) { 648 vaddr = px_vaddr(pd); 649 vaddr[maybe_64K] |= GEN8_PDE_IPS_64K; 650 clflush_cache_range(vaddr, PAGE_SIZE); 651 page_size = I915_GTT_PAGE_SIZE_64K; 652 653 /* 654 * We write all 4K page entries, even when using 64K 655 * pages. In order to verify that the HW isn't cheating 656 * by using the 4K PTE instead of the 64K PTE, we want 657 * to remove all the surplus entries. If the HW skipped 658 * the 64K PTE, it will read/write into the scratch page 659 * instead - which we detect as missing results during 660 * selftests. 661 */ 662 if (I915_SELFTEST_ONLY(vm->scrub_64K)) { 663 u16 i; 664 665 encode = vm->scratch[0]->encode; 666 vaddr = px_vaddr(i915_pt_entry(pd, maybe_64K)); 667 668 for (i = 1; i < index; i += 16) 669 memset64(vaddr + i, encode, 15); 670 671 clflush_cache_range(vaddr, PAGE_SIZE); 672 } 673 } 674 675 vma_res->page_sizes_gtt |= page_size; 676 } while (iter->sg && sg_dma_len(iter->sg)); 677 } 678 679 static void gen8_ppgtt_insert(struct i915_address_space *vm, 680 struct i915_vma_resource *vma_res, 681 enum i915_cache_level cache_level, 682 u32 flags) 683 { 684 struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm); 685 struct sgt_dma iter = sgt_dma(vma_res); 686 687 if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) { 688 if (HAS_64K_PAGES(vm->i915)) 689 xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags); 690 else 691 gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags); 692 } else { 693 u64 idx = vma_res->start >> GEN8_PTE_SHIFT; 694 695 do { 696 struct i915_page_directory * const pdp = 697 gen8_pdp_for_page_index(vm, idx); 698 699 idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx, 700 cache_level, flags); 701 } while (idx); 702 703 vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; 704 } 705 } 706 707 static void gen8_ppgtt_insert_entry(struct i915_address_space *vm, 708 dma_addr_t addr, 709 u64 offset, 710 enum i915_cache_level level, 711 u32 flags) 712 { 713 u64 idx = offset >> GEN8_PTE_SHIFT; 714 struct i915_page_directory * const pdp = 715 gen8_pdp_for_page_index(vm, idx); 716 struct i915_page_directory *pd = 717 i915_pd_entry(pdp, gen8_pd_index(idx, 2)); 718 struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1)); 719 gen8_pte_t *vaddr; 720 721 GEM_BUG_ON(pt->is_compact); 722 723 vaddr = px_vaddr(pt); 724 vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags); 725 clflush_cache_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr)); 726 } 727 728 static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm, 729 dma_addr_t addr, 730 u64 offset, 731 enum i915_cache_level level, 732 u32 flags) 733 { 734 u64 idx = offset >> GEN8_PTE_SHIFT; 735 struct i915_page_directory * const pdp = 736 gen8_pdp_for_page_index(vm, idx); 737 struct i915_page_directory *pd = 738 i915_pd_entry(pdp, gen8_pd_index(idx, 2)); 739 struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1)); 740 gen8_pte_t *vaddr; 741 742 GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K)); 743 GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K)); 744 745 if (!pt->is_compact) { 746 vaddr = px_vaddr(pd); 747 vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K; 748 pt->is_compact = true; 749 } 750 751 vaddr = px_vaddr(pt); 752 vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags); 753 } 754 755 static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm, 756 dma_addr_t addr, 757 u64 offset, 758 enum i915_cache_level level, 759 u32 flags) 760 { 761 if (flags & PTE_LM) 762 return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset, 763 level, flags); 764 765 return gen8_ppgtt_insert_entry(vm, addr, offset, level, flags); 766 } 767 768 static int gen8_init_scratch(struct i915_address_space *vm) 769 { 770 u32 pte_flags; 771 int ret; 772 int i; 773 774 /* 775 * If everybody agrees to not to write into the scratch page, 776 * we can reuse it for all vm, keeping contexts and processes separate. 777 */ 778 if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) { 779 struct i915_address_space *clone = vm->gt->vm; 780 781 GEM_BUG_ON(!clone->has_read_only); 782 783 vm->scratch_order = clone->scratch_order; 784 for (i = 0; i <= vm->top; i++) 785 vm->scratch[i] = i915_gem_object_get(clone->scratch[i]); 786 787 return 0; 788 } 789 790 ret = setup_scratch_page(vm); 791 if (ret) 792 return ret; 793 794 pte_flags = vm->has_read_only; 795 if (i915_gem_object_is_lmem(vm->scratch[0])) 796 pte_flags |= PTE_LM; 797 798 vm->scratch[0]->encode = 799 gen8_pte_encode(px_dma(vm->scratch[0]), 800 I915_CACHE_NONE, pte_flags); 801 802 for (i = 1; i <= vm->top; i++) { 803 struct drm_i915_gem_object *obj; 804 805 obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); 806 if (IS_ERR(obj)) 807 goto free_scratch; 808 809 ret = map_pt_dma(vm, obj); 810 if (ret) { 811 i915_gem_object_put(obj); 812 goto free_scratch; 813 } 814 815 fill_px(obj, vm->scratch[i - 1]->encode); 816 obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_NONE); 817 818 vm->scratch[i] = obj; 819 } 820 821 return 0; 822 823 free_scratch: 824 while (i--) 825 i915_gem_object_put(vm->scratch[i]); 826 return -ENOMEM; 827 } 828 829 static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) 830 { 831 struct i915_address_space *vm = &ppgtt->vm; 832 struct i915_page_directory *pd = ppgtt->pd; 833 unsigned int idx; 834 835 GEM_BUG_ON(vm->top != 2); 836 GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES); 837 838 for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) { 839 struct i915_page_directory *pde; 840 int err; 841 842 pde = alloc_pd(vm); 843 if (IS_ERR(pde)) 844 return PTR_ERR(pde); 845 846 err = map_pt_dma(vm, pde->pt.base); 847 if (err) { 848 free_pd(vm, pde); 849 return err; 850 } 851 852 fill_px(pde, vm->scratch[1]->encode); 853 set_pd_entry(pd, idx, pde); 854 atomic_inc(px_used(pde)); /* keep pinned */ 855 } 856 wmb(); 857 858 return 0; 859 } 860 861 static struct i915_page_directory * 862 gen8_alloc_top_pd(struct i915_address_space *vm) 863 { 864 const unsigned int count = gen8_pd_top_count(vm); 865 struct i915_page_directory *pd; 866 int err; 867 868 GEM_BUG_ON(count > I915_PDES); 869 870 pd = __alloc_pd(count); 871 if (unlikely(!pd)) 872 return ERR_PTR(-ENOMEM); 873 874 pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K); 875 if (IS_ERR(pd->pt.base)) { 876 err = PTR_ERR(pd->pt.base); 877 pd->pt.base = NULL; 878 goto err_pd; 879 } 880 881 err = map_pt_dma(vm, pd->pt.base); 882 if (err) 883 goto err_pd; 884 885 fill_page_dma(px_base(pd), vm->scratch[vm->top]->encode, count); 886 atomic_inc(px_used(pd)); /* mark as pinned */ 887 return pd; 888 889 err_pd: 890 free_pd(vm, pd); 891 return ERR_PTR(err); 892 } 893 894 /* 895 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers 896 * with a net effect resembling a 2-level page table in normal x86 terms. Each 897 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address 898 * space. 899 * 900 */ 901 struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, 902 unsigned long lmem_pt_obj_flags) 903 { 904 struct i915_ppgtt *ppgtt; 905 int err; 906 907 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 908 if (!ppgtt) 909 return ERR_PTR(-ENOMEM); 910 911 ppgtt_init(ppgtt, gt, lmem_pt_obj_flags); 912 ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2; 913 ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t)); 914 915 /* 916 * From bdw, there is hw support for read-only pages in the PPGTT. 917 * 918 * Gen11 has HSDES#:1807136187 unresolved. Disable ro support 919 * for now. 920 * 921 * Gen12 has inherited the same read-only fault issue from gen11. 922 */ 923 ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12); 924 925 if (HAS_LMEM(gt->i915)) { 926 ppgtt->vm.alloc_pt_dma = alloc_pt_lmem; 927 928 /* 929 * On some platforms the hw has dropped support for 4K GTT pages 930 * when dealing with LMEM, and due to the design of 64K GTT 931 * pages in the hw, we can only mark the *entire* page-table as 932 * operating in 64K GTT mode, since the enable bit is still on 933 * the pde, and not the pte. And since we still need to allow 934 * 4K GTT pages for SMEM objects, we can't have a "normal" 4K 935 * page-table with scratch pointing to LMEM, since that's 936 * undefined from the hw pov. The simplest solution is to just 937 * move the 64K scratch page to SMEM on such platforms and call 938 * it a day, since that should work for all configurations. 939 */ 940 if (HAS_64K_PAGES(gt->i915)) 941 ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; 942 else 943 ppgtt->vm.alloc_scratch_dma = alloc_pt_lmem; 944 } else { 945 ppgtt->vm.alloc_pt_dma = alloc_pt_dma; 946 ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; 947 } 948 949 err = gen8_init_scratch(&ppgtt->vm); 950 if (err) 951 goto err_free; 952 953 ppgtt->pd = gen8_alloc_top_pd(&ppgtt->vm); 954 if (IS_ERR(ppgtt->pd)) { 955 err = PTR_ERR(ppgtt->pd); 956 goto err_free_scratch; 957 } 958 959 if (!i915_vm_is_4lvl(&ppgtt->vm)) { 960 err = gen8_preallocate_top_level_pdp(ppgtt); 961 if (err) 962 goto err_free_pd; 963 } 964 965 ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND; 966 ppgtt->vm.insert_entries = gen8_ppgtt_insert; 967 if (HAS_64K_PAGES(gt->i915)) 968 ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry; 969 else 970 ppgtt->vm.insert_page = gen8_ppgtt_insert_entry; 971 ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; 972 ppgtt->vm.clear_range = gen8_ppgtt_clear; 973 ppgtt->vm.foreach = gen8_ppgtt_foreach; 974 975 ppgtt->vm.pte_encode = gen8_pte_encode; 976 977 if (intel_vgpu_active(gt->i915)) 978 gen8_ppgtt_notify_vgt(ppgtt, true); 979 980 ppgtt->vm.cleanup = gen8_ppgtt_cleanup; 981 982 return ppgtt; 983 984 err_free_pd: 985 __gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd, 986 gen8_pd_top_count(&ppgtt->vm), ppgtt->vm.top); 987 err_free_scratch: 988 free_scratch(&ppgtt->vm); 989 err_free: 990 kfree(ppgtt); 991 return ERR_PTR(err); 992 } 993