1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/log2.h> 7 8 #include "gen6_ppgtt.h" 9 #include "i915_scatterlist.h" 10 #include "i915_trace.h" 11 #include "i915_vgpu.h" 12 #include "intel_gt.h" 13 14 /* Write pde (index) from the page directory @pd to the page table @pt */ 15 static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt, 16 const unsigned int pde, 17 const struct i915_page_table *pt) 18 { 19 /* Caller needs to make sure the write completes if necessary */ 20 iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID, 21 ppgtt->pd_addr + pde); 22 } 23 24 void gen7_ppgtt_enable(struct intel_gt *gt) 25 { 26 struct drm_i915_private *i915 = gt->i915; 27 struct intel_uncore *uncore = gt->uncore; 28 struct intel_engine_cs *engine; 29 enum intel_engine_id id; 30 u32 ecochk; 31 32 intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B); 33 34 ecochk = intel_uncore_read(uncore, GAM_ECOCHK); 35 if (IS_HASWELL(i915)) { 36 ecochk |= ECOCHK_PPGTT_WB_HSW; 37 } else { 38 ecochk |= ECOCHK_PPGTT_LLC_IVB; 39 ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; 40 } 41 intel_uncore_write(uncore, GAM_ECOCHK, ecochk); 42 43 for_each_engine(engine, gt, id) { 44 /* GFX_MODE is per-ring on gen7+ */ 45 ENGINE_WRITE(engine, 46 RING_MODE_GEN7, 47 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 48 } 49 } 50 51 void gen6_ppgtt_enable(struct intel_gt *gt) 52 { 53 struct intel_uncore *uncore = gt->uncore; 54 55 intel_uncore_rmw(uncore, 56 GAC_ECO_BITS, 57 0, 58 ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B); 59 60 intel_uncore_rmw(uncore, 61 GAB_CTL, 62 0, 63 GAB_CTL_CONT_AFTER_PAGEFAULT); 64 65 intel_uncore_rmw(uncore, 66 GAM_ECOCHK, 67 0, 68 ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); 69 70 if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */ 71 intel_uncore_write(uncore, 72 GFX_MODE, 73 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); 74 } 75 76 /* PPGTT support for Sandybdrige/Gen6 and later */ 77 static void gen6_ppgtt_clear_range(struct i915_address_space *vm, 78 u64 start, u64 length) 79 { 80 struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); 81 const unsigned int first_entry = start / I915_GTT_PAGE_SIZE; 82 const gen6_pte_t scratch_pte = vm->scratch[0].encode; 83 unsigned int pde = first_entry / GEN6_PTES; 84 unsigned int pte = first_entry % GEN6_PTES; 85 unsigned int num_entries = length / I915_GTT_PAGE_SIZE; 86 87 while (num_entries) { 88 struct i915_page_table * const pt = 89 i915_pt_entry(ppgtt->base.pd, pde++); 90 const unsigned int count = min(num_entries, GEN6_PTES - pte); 91 gen6_pte_t *vaddr; 92 93 GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1])); 94 95 num_entries -= count; 96 97 GEM_BUG_ON(count > atomic_read(&pt->used)); 98 if (!atomic_sub_return(count, &pt->used)) 99 ppgtt->scan_for_unused_pt = true; 100 101 /* 102 * Note that the hw doesn't support removing PDE on the fly 103 * (they are cached inside the context with no means to 104 * invalidate the cache), so we can only reset the PTE 105 * entries back to scratch. 106 */ 107 108 vaddr = kmap_atomic_px(pt); 109 memset32(vaddr + pte, scratch_pte, count); 110 kunmap_atomic(vaddr); 111 112 pte = 0; 113 } 114 } 115 116 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, 117 struct i915_vma *vma, 118 enum i915_cache_level cache_level, 119 u32 flags) 120 { 121 struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); 122 struct i915_page_directory * const pd = ppgtt->pd; 123 unsigned int first_entry = vma->node.start / I915_GTT_PAGE_SIZE; 124 unsigned int act_pt = first_entry / GEN6_PTES; 125 unsigned int act_pte = first_entry % GEN6_PTES; 126 const u32 pte_encode = vm->pte_encode(0, cache_level, flags); 127 struct sgt_dma iter = sgt_dma(vma); 128 gen6_pte_t *vaddr; 129 130 GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]); 131 132 vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt)); 133 do { 134 vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); 135 136 iter.dma += I915_GTT_PAGE_SIZE; 137 if (iter.dma == iter.max) { 138 iter.sg = __sg_next(iter.sg); 139 if (!iter.sg) 140 break; 141 142 iter.dma = sg_dma_address(iter.sg); 143 iter.max = iter.dma + iter.sg->length; 144 } 145 146 if (++act_pte == GEN6_PTES) { 147 kunmap_atomic(vaddr); 148 vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt)); 149 act_pte = 0; 150 } 151 } while (1); 152 kunmap_atomic(vaddr); 153 154 vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; 155 } 156 157 static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end) 158 { 159 struct i915_page_directory * const pd = ppgtt->base.pd; 160 struct i915_page_table *pt; 161 unsigned int pde; 162 163 start = round_down(start, SZ_64K); 164 end = round_up(end, SZ_64K) - start; 165 166 mutex_lock(&ppgtt->flush); 167 168 gen6_for_each_pde(pt, pd, start, end, pde) 169 gen6_write_pde(ppgtt, pde, pt); 170 171 mb(); 172 ioread32(ppgtt->pd_addr + pde - 1); 173 gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt); 174 mb(); 175 176 mutex_unlock(&ppgtt->flush); 177 } 178 179 static int gen6_alloc_va_range(struct i915_address_space *vm, 180 u64 start, u64 length) 181 { 182 struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); 183 struct i915_page_directory * const pd = ppgtt->base.pd; 184 struct i915_page_table *pt, *alloc = NULL; 185 intel_wakeref_t wakeref; 186 u64 from = start; 187 unsigned int pde; 188 int ret = 0; 189 190 wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); 191 192 spin_lock(&pd->lock); 193 gen6_for_each_pde(pt, pd, start, length, pde) { 194 const unsigned int count = gen6_pte_count(start, length); 195 196 if (px_base(pt) == px_base(&vm->scratch[1])) { 197 spin_unlock(&pd->lock); 198 199 pt = fetch_and_zero(&alloc); 200 if (!pt) 201 pt = alloc_pt(vm); 202 if (IS_ERR(pt)) { 203 ret = PTR_ERR(pt); 204 goto unwind_out; 205 } 206 207 fill32_px(pt, vm->scratch[0].encode); 208 209 spin_lock(&pd->lock); 210 if (pd->entry[pde] == &vm->scratch[1]) { 211 pd->entry[pde] = pt; 212 } else { 213 alloc = pt; 214 pt = pd->entry[pde]; 215 } 216 } 217 218 atomic_add(count, &pt->used); 219 } 220 spin_unlock(&pd->lock); 221 222 if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) 223 gen6_flush_pd(ppgtt, from, start); 224 225 goto out; 226 227 unwind_out: 228 gen6_ppgtt_clear_range(vm, from, start - from); 229 out: 230 if (alloc) 231 free_px(vm, alloc); 232 intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref); 233 return ret; 234 } 235 236 static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) 237 { 238 struct i915_address_space * const vm = &ppgtt->base.vm; 239 struct i915_page_directory * const pd = ppgtt->base.pd; 240 int ret; 241 242 ret = setup_scratch_page(vm, __GFP_HIGHMEM); 243 if (ret) 244 return ret; 245 246 vm->scratch[0].encode = 247 vm->pte_encode(px_dma(&vm->scratch[0]), 248 I915_CACHE_NONE, PTE_READ_ONLY); 249 250 if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) { 251 cleanup_scratch_page(vm); 252 return -ENOMEM; 253 } 254 255 fill32_px(&vm->scratch[1], vm->scratch[0].encode); 256 memset_p(pd->entry, &vm->scratch[1], I915_PDES); 257 258 return 0; 259 } 260 261 static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) 262 { 263 struct i915_page_directory * const pd = ppgtt->base.pd; 264 struct i915_page_dma * const scratch = 265 px_base(&ppgtt->base.vm.scratch[1]); 266 struct i915_page_table *pt; 267 u32 pde; 268 269 gen6_for_all_pdes(pt, pd, pde) 270 if (px_base(pt) != scratch) 271 free_px(&ppgtt->base.vm, pt); 272 } 273 274 static void gen6_ppgtt_cleanup(struct i915_address_space *vm) 275 { 276 struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); 277 278 __i915_vma_put(ppgtt->vma); 279 280 gen6_ppgtt_free_pd(ppgtt); 281 free_scratch(vm); 282 283 mutex_destroy(&ppgtt->flush); 284 mutex_destroy(&ppgtt->pin_mutex); 285 kfree(ppgtt->base.pd); 286 } 287 288 static int pd_vma_set_pages(struct i915_vma *vma) 289 { 290 vma->pages = ERR_PTR(-ENODEV); 291 return 0; 292 } 293 294 static void pd_vma_clear_pages(struct i915_vma *vma) 295 { 296 GEM_BUG_ON(!vma->pages); 297 298 vma->pages = NULL; 299 } 300 301 static int pd_vma_bind(struct i915_vma *vma, 302 enum i915_cache_level cache_level, 303 u32 unused) 304 { 305 struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm); 306 struct gen6_ppgtt *ppgtt = vma->private; 307 u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE; 308 309 px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t); 310 ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset; 311 312 gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total); 313 return 0; 314 } 315 316 static void pd_vma_unbind(struct i915_vma *vma) 317 { 318 struct gen6_ppgtt *ppgtt = vma->private; 319 struct i915_page_directory * const pd = ppgtt->base.pd; 320 struct i915_page_dma * const scratch = 321 px_base(&ppgtt->base.vm.scratch[1]); 322 struct i915_page_table *pt; 323 unsigned int pde; 324 325 if (!ppgtt->scan_for_unused_pt) 326 return; 327 328 /* Free all no longer used page tables */ 329 gen6_for_all_pdes(pt, ppgtt->base.pd, pde) { 330 if (px_base(pt) == scratch || atomic_read(&pt->used)) 331 continue; 332 333 free_px(&ppgtt->base.vm, pt); 334 pd->entry[pde] = scratch; 335 } 336 337 ppgtt->scan_for_unused_pt = false; 338 } 339 340 static const struct i915_vma_ops pd_vma_ops = { 341 .set_pages = pd_vma_set_pages, 342 .clear_pages = pd_vma_clear_pages, 343 .bind_vma = pd_vma_bind, 344 .unbind_vma = pd_vma_unbind, 345 }; 346 347 static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) 348 { 349 struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt; 350 struct i915_vma *vma; 351 352 GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); 353 GEM_BUG_ON(size > ggtt->vm.total); 354 355 vma = i915_vma_alloc(); 356 if (!vma) 357 return ERR_PTR(-ENOMEM); 358 359 i915_active_init(&vma->active, NULL, NULL); 360 361 kref_init(&vma->ref); 362 mutex_init(&vma->pages_mutex); 363 vma->vm = i915_vm_get(&ggtt->vm); 364 vma->ops = &pd_vma_ops; 365 vma->private = ppgtt; 366 367 vma->size = size; 368 vma->fence_size = size; 369 atomic_set(&vma->flags, I915_VMA_GGTT); 370 vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */ 371 372 INIT_LIST_HEAD(&vma->obj_link); 373 INIT_LIST_HEAD(&vma->closed_link); 374 375 return vma; 376 } 377 378 int gen6_ppgtt_pin(struct i915_ppgtt *base) 379 { 380 struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); 381 int err; 382 383 GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open)); 384 385 /* 386 * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt 387 * which will be pinned into every active context. 388 * (When vma->pin_count becomes atomic, I expect we will naturally 389 * need a larger, unpacked, type and kill this redundancy.) 390 */ 391 if (atomic_add_unless(&ppgtt->pin_count, 1, 0)) 392 return 0; 393 394 if (mutex_lock_interruptible(&ppgtt->pin_mutex)) 395 return -EINTR; 396 397 /* 398 * PPGTT PDEs reside in the GGTT and consists of 512 entries. The 399 * allocator works in address space sizes, so it's multiplied by page 400 * size. We allocate at the top of the GTT to avoid fragmentation. 401 */ 402 err = 0; 403 if (!atomic_read(&ppgtt->pin_count)) 404 err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH); 405 if (!err) 406 atomic_inc(&ppgtt->pin_count); 407 mutex_unlock(&ppgtt->pin_mutex); 408 409 return err; 410 } 411 412 void gen6_ppgtt_unpin(struct i915_ppgtt *base) 413 { 414 struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); 415 416 GEM_BUG_ON(!atomic_read(&ppgtt->pin_count)); 417 if (atomic_dec_and_test(&ppgtt->pin_count)) 418 i915_vma_unpin(ppgtt->vma); 419 } 420 421 void gen6_ppgtt_unpin_all(struct i915_ppgtt *base) 422 { 423 struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base); 424 425 if (!atomic_read(&ppgtt->pin_count)) 426 return; 427 428 i915_vma_unpin(ppgtt->vma); 429 atomic_set(&ppgtt->pin_count, 0); 430 } 431 432 struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) 433 { 434 struct i915_ggtt * const ggtt = gt->ggtt; 435 struct gen6_ppgtt *ppgtt; 436 int err; 437 438 ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); 439 if (!ppgtt) 440 return ERR_PTR(-ENOMEM); 441 442 mutex_init(&ppgtt->flush); 443 mutex_init(&ppgtt->pin_mutex); 444 445 ppgtt_init(&ppgtt->base, gt); 446 ppgtt->base.vm.top = 1; 447 448 ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND; 449 ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range; 450 ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; 451 ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries; 452 ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup; 453 454 ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; 455 456 ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd)); 457 if (!ppgtt->base.pd) { 458 err = -ENOMEM; 459 goto err_free; 460 } 461 462 err = gen6_ppgtt_init_scratch(ppgtt); 463 if (err) 464 goto err_pd; 465 466 ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); 467 if (IS_ERR(ppgtt->vma)) { 468 err = PTR_ERR(ppgtt->vma); 469 goto err_scratch; 470 } 471 472 return &ppgtt->base; 473 474 err_scratch: 475 free_scratch(&ppgtt->base.vm); 476 err_pd: 477 kfree(ppgtt->base.pd); 478 err_free: 479 mutex_destroy(&ppgtt->pin_mutex); 480 kfree(ppgtt); 481 return ERR_PTR(err); 482 } 483