xref: /openbmc/linux/drivers/gpu/drm/i915/gt/gen6_ppgtt.c (revision 301306a9)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5 
6 #include <linux/log2.h>
7 
8 #include "gen6_ppgtt.h"
9 #include "i915_scatterlist.h"
10 #include "i915_trace.h"
11 #include "i915_vgpu.h"
12 #include "intel_gt_regs.h"
13 #include "intel_engine_regs.h"
14 #include "intel_gt.h"
15 
16 /* Write pde (index) from the page directory @pd to the page table @pt */
17 static void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
18 			   const unsigned int pde,
19 			   const struct i915_page_table *pt)
20 {
21 	dma_addr_t addr = pt ? px_dma(pt) : px_dma(ppgtt->base.vm.scratch[1]);
22 
23 	/* Caller needs to make sure the write completes if necessary */
24 	iowrite32(GEN6_PDE_ADDR_ENCODE(addr) | GEN6_PDE_VALID,
25 		  ppgtt->pd_addr + pde);
26 }
27 
28 void gen7_ppgtt_enable(struct intel_gt *gt)
29 {
30 	struct drm_i915_private *i915 = gt->i915;
31 	struct intel_uncore *uncore = gt->uncore;
32 	u32 ecochk;
33 
34 	intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B);
35 
36 	ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
37 	if (IS_HASWELL(i915)) {
38 		ecochk |= ECOCHK_PPGTT_WB_HSW;
39 	} else {
40 		ecochk |= ECOCHK_PPGTT_LLC_IVB;
41 		ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
42 	}
43 	intel_uncore_write(uncore, GAM_ECOCHK, ecochk);
44 }
45 
46 void gen6_ppgtt_enable(struct intel_gt *gt)
47 {
48 	struct intel_uncore *uncore = gt->uncore;
49 
50 	intel_uncore_rmw(uncore,
51 			 GAC_ECO_BITS,
52 			 0,
53 			 ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B);
54 
55 	intel_uncore_rmw(uncore,
56 			 GAB_CTL,
57 			 0,
58 			 GAB_CTL_CONT_AFTER_PAGEFAULT);
59 
60 	intel_uncore_rmw(uncore,
61 			 GAM_ECOCHK,
62 			 0,
63 			 ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
64 
65 	if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */
66 		intel_uncore_write(uncore,
67 				   GFX_MODE,
68 				   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
69 }
70 
71 /* PPGTT support for Sandybdrige/Gen6 and later */
72 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
73 				   u64 start, u64 length)
74 {
75 	struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
76 	const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
77 	const gen6_pte_t scratch_pte = vm->scratch[0]->encode;
78 	unsigned int pde = first_entry / GEN6_PTES;
79 	unsigned int pte = first_entry % GEN6_PTES;
80 	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
81 
82 	while (num_entries) {
83 		struct i915_page_table * const pt =
84 			i915_pt_entry(ppgtt->base.pd, pde++);
85 		const unsigned int count = min(num_entries, GEN6_PTES - pte);
86 		gen6_pte_t *vaddr;
87 
88 		num_entries -= count;
89 
90 		GEM_BUG_ON(count > atomic_read(&pt->used));
91 		if (!atomic_sub_return(count, &pt->used))
92 			ppgtt->scan_for_unused_pt = true;
93 
94 		/*
95 		 * Note that the hw doesn't support removing PDE on the fly
96 		 * (they are cached inside the context with no means to
97 		 * invalidate the cache), so we can only reset the PTE
98 		 * entries back to scratch.
99 		 */
100 
101 		vaddr = px_vaddr(pt);
102 		memset32(vaddr + pte, scratch_pte, count);
103 
104 		pte = 0;
105 	}
106 }
107 
108 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
109 				      struct i915_vma *vma,
110 				      enum i915_cache_level cache_level,
111 				      u32 flags)
112 {
113 	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
114 	struct i915_page_directory * const pd = ppgtt->pd;
115 	unsigned int first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
116 	unsigned int act_pt = first_entry / GEN6_PTES;
117 	unsigned int act_pte = first_entry % GEN6_PTES;
118 	const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
119 	struct sgt_dma iter = sgt_dma(vma);
120 	gen6_pte_t *vaddr;
121 
122 	GEM_BUG_ON(!pd->entry[act_pt]);
123 
124 	vaddr = px_vaddr(i915_pt_entry(pd, act_pt));
125 	do {
126 		GEM_BUG_ON(sg_dma_len(iter.sg) < I915_GTT_PAGE_SIZE);
127 		vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
128 
129 		iter.dma += I915_GTT_PAGE_SIZE;
130 		if (iter.dma == iter.max) {
131 			iter.sg = __sg_next(iter.sg);
132 			if (!iter.sg || sg_dma_len(iter.sg) == 0)
133 				break;
134 
135 			iter.dma = sg_dma_address(iter.sg);
136 			iter.max = iter.dma + sg_dma_len(iter.sg);
137 		}
138 
139 		if (++act_pte == GEN6_PTES) {
140 			vaddr = px_vaddr(i915_pt_entry(pd, ++act_pt));
141 			act_pte = 0;
142 		}
143 	} while (1);
144 
145 	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
146 }
147 
148 static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
149 {
150 	struct i915_page_directory * const pd = ppgtt->base.pd;
151 	struct i915_page_table *pt;
152 	unsigned int pde;
153 
154 	start = round_down(start, SZ_64K);
155 	end = round_up(end, SZ_64K) - start;
156 
157 	mutex_lock(&ppgtt->flush);
158 
159 	gen6_for_each_pde(pt, pd, start, end, pde)
160 		gen6_write_pde(ppgtt, pde, pt);
161 
162 	mb();
163 	ioread32(ppgtt->pd_addr + pde - 1);
164 	gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt);
165 	mb();
166 
167 	mutex_unlock(&ppgtt->flush);
168 }
169 
170 static void gen6_alloc_va_range(struct i915_address_space *vm,
171 				struct i915_vm_pt_stash *stash,
172 				u64 start, u64 length)
173 {
174 	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
175 	struct i915_page_directory * const pd = ppgtt->base.pd;
176 	struct i915_page_table *pt;
177 	bool flush = false;
178 	u64 from = start;
179 	unsigned int pde;
180 
181 	spin_lock(&pd->lock);
182 	gen6_for_each_pde(pt, pd, start, length, pde) {
183 		const unsigned int count = gen6_pte_count(start, length);
184 
185 		if (!pt) {
186 			spin_unlock(&pd->lock);
187 
188 			pt = stash->pt[0];
189 			__i915_gem_object_pin_pages(pt->base);
190 
191 			fill32_px(pt, vm->scratch[0]->encode);
192 
193 			spin_lock(&pd->lock);
194 			if (!pd->entry[pde]) {
195 				stash->pt[0] = pt->stash;
196 				atomic_set(&pt->used, 0);
197 				pd->entry[pde] = pt;
198 			} else {
199 				pt = pd->entry[pde];
200 			}
201 
202 			flush = true;
203 		}
204 
205 		atomic_add(count, &pt->used);
206 	}
207 	spin_unlock(&pd->lock);
208 
209 	if (flush && i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) {
210 		intel_wakeref_t wakeref;
211 
212 		with_intel_runtime_pm(&vm->i915->runtime_pm, wakeref)
213 			gen6_flush_pd(ppgtt, from, start);
214 	}
215 }
216 
217 static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
218 {
219 	struct i915_address_space * const vm = &ppgtt->base.vm;
220 	int ret;
221 
222 	ret = setup_scratch_page(vm);
223 	if (ret)
224 		return ret;
225 
226 	vm->scratch[0]->encode =
227 		vm->pte_encode(px_dma(vm->scratch[0]),
228 			       I915_CACHE_NONE, PTE_READ_ONLY);
229 
230 	vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
231 	if (IS_ERR(vm->scratch[1])) {
232 		ret = PTR_ERR(vm->scratch[1]);
233 		goto err_scratch0;
234 	}
235 
236 	ret = map_pt_dma(vm, vm->scratch[1]);
237 	if (ret)
238 		goto err_scratch1;
239 
240 	fill32_px(vm->scratch[1], vm->scratch[0]->encode);
241 
242 	return 0;
243 
244 err_scratch1:
245 	i915_gem_object_put(vm->scratch[1]);
246 err_scratch0:
247 	i915_gem_object_put(vm->scratch[0]);
248 	return ret;
249 }
250 
251 static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
252 {
253 	struct i915_page_directory * const pd = ppgtt->base.pd;
254 	struct i915_page_table *pt;
255 	u32 pde;
256 
257 	gen6_for_all_pdes(pt, pd, pde)
258 		if (pt)
259 			free_pt(&ppgtt->base.vm, pt);
260 }
261 
262 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
263 {
264 	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
265 
266 	gen6_ppgtt_free_pd(ppgtt);
267 	free_scratch(vm);
268 
269 	mutex_destroy(&ppgtt->flush);
270 
271 	free_pd(&ppgtt->base.vm, ppgtt->base.pd);
272 }
273 
274 static void pd_vma_bind(struct i915_address_space *vm,
275 			struct i915_vm_pt_stash *stash,
276 			struct i915_vma *vma,
277 			enum i915_cache_level cache_level,
278 			u32 unused)
279 {
280 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
281 	struct gen6_ppgtt *ppgtt = vma->private;
282 	u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
283 
284 	ppgtt->pp_dir = ggtt_offset * sizeof(gen6_pte_t) << 10;
285 	ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
286 
287 	gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total);
288 }
289 
290 static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
291 {
292 	struct gen6_ppgtt *ppgtt = vma->private;
293 	struct i915_page_directory * const pd = ppgtt->base.pd;
294 	struct i915_page_table *pt;
295 	unsigned int pde;
296 
297 	if (!ppgtt->scan_for_unused_pt)
298 		return;
299 
300 	/* Free all no longer used page tables */
301 	gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
302 		if (!pt || atomic_read(&pt->used))
303 			continue;
304 
305 		free_pt(&ppgtt->base.vm, pt);
306 		pd->entry[pde] = NULL;
307 	}
308 
309 	ppgtt->scan_for_unused_pt = false;
310 }
311 
312 static const struct i915_vma_ops pd_vma_ops = {
313 	.bind_vma = pd_vma_bind,
314 	.unbind_vma = pd_vma_unbind,
315 };
316 
317 int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
318 {
319 	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
320 	int err;
321 
322 	GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open));
323 
324 	/*
325 	 * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
326 	 * which will be pinned into every active context.
327 	 * (When vma->pin_count becomes atomic, I expect we will naturally
328 	 * need a larger, unpacked, type and kill this redundancy.)
329 	 */
330 	if (atomic_add_unless(&ppgtt->pin_count, 1, 0))
331 		return 0;
332 
333 	/* grab the ppgtt resv to pin the object */
334 	err = i915_vm_lock_objects(&ppgtt->base.vm, ww);
335 	if (err)
336 		return err;
337 
338 	/*
339 	 * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
340 	 * allocator works in address space sizes, so it's multiplied by page
341 	 * size. We allocate at the top of the GTT to avoid fragmentation.
342 	 */
343 	if (!atomic_read(&ppgtt->pin_count)) {
344 		err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH);
345 
346 		GEM_BUG_ON(ppgtt->vma->fence);
347 		clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(ppgtt->vma));
348 	}
349 	if (!err)
350 		atomic_inc(&ppgtt->pin_count);
351 
352 	return err;
353 }
354 
355 static int pd_dummy_obj_get_pages(struct drm_i915_gem_object *obj)
356 {
357 	obj->mm.pages = ZERO_SIZE_PTR;
358 	return 0;
359 }
360 
361 static void pd_dummy_obj_put_pages(struct drm_i915_gem_object *obj,
362 				   struct sg_table *pages)
363 {
364 }
365 
366 static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = {
367 	.name = "pd_dummy_obj",
368 	.get_pages = pd_dummy_obj_get_pages,
369 	.put_pages = pd_dummy_obj_put_pages,
370 };
371 
372 static struct i915_page_directory *
373 gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt)
374 {
375 	struct i915_ggtt * const ggtt = ppgtt->base.vm.gt->ggtt;
376 	struct i915_page_directory *pd;
377 	int err;
378 
379 	pd = __alloc_pd(I915_PDES);
380 	if (unlikely(!pd))
381 		return ERR_PTR(-ENOMEM);
382 
383 	pd->pt.base = __i915_gem_object_create_internal(ppgtt->base.vm.gt->i915,
384 							&pd_dummy_obj_ops,
385 							I915_PDES * SZ_4K);
386 	if (IS_ERR(pd->pt.base)) {
387 		err = PTR_ERR(pd->pt.base);
388 		pd->pt.base = NULL;
389 		goto err_pd;
390 	}
391 
392 	pd->pt.base->base.resv = i915_vm_resv_get(&ppgtt->base.vm);
393 	pd->pt.base->shares_resv_from = &ppgtt->base.vm;
394 
395 	ppgtt->vma = i915_vma_instance(pd->pt.base, &ggtt->vm, NULL);
396 	if (IS_ERR(ppgtt->vma)) {
397 		err = PTR_ERR(ppgtt->vma);
398 		ppgtt->vma = NULL;
399 		goto err_pd;
400 	}
401 
402 	/* The dummy object we create is special, override ops.. */
403 	ppgtt->vma->ops = &pd_vma_ops;
404 	ppgtt->vma->private = ppgtt;
405 	return pd;
406 
407 err_pd:
408 	free_pd(&ppgtt->base.vm, pd);
409 	return ERR_PTR(err);
410 }
411 
412 void gen6_ppgtt_unpin(struct i915_ppgtt *base)
413 {
414 	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
415 
416 	GEM_BUG_ON(!atomic_read(&ppgtt->pin_count));
417 	if (atomic_dec_and_test(&ppgtt->pin_count))
418 		i915_vma_unpin(ppgtt->vma);
419 }
420 
421 struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
422 {
423 	struct i915_ggtt * const ggtt = gt->ggtt;
424 	struct gen6_ppgtt *ppgtt;
425 	int err;
426 
427 	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
428 	if (!ppgtt)
429 		return ERR_PTR(-ENOMEM);
430 
431 	mutex_init(&ppgtt->flush);
432 
433 	ppgtt_init(&ppgtt->base, gt, 0);
434 	ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
435 	ppgtt->base.vm.top = 1;
436 
437 	ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
438 	ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
439 	ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
440 	ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
441 	ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
442 
443 	ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma;
444 	ppgtt->base.vm.alloc_scratch_dma = alloc_pt_dma;
445 	ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
446 
447 	err = gen6_ppgtt_init_scratch(ppgtt);
448 	if (err)
449 		goto err_free;
450 
451 	ppgtt->base.pd = gen6_alloc_top_pd(ppgtt);
452 	if (IS_ERR(ppgtt->base.pd)) {
453 		err = PTR_ERR(ppgtt->base.pd);
454 		goto err_scratch;
455 	}
456 
457 	return &ppgtt->base;
458 
459 err_scratch:
460 	free_scratch(&ppgtt->base.vm);
461 err_free:
462 	kfree(ppgtt);
463 	return ERR_PTR(err);
464 }
465