xref: /openbmc/linux/drivers/gpu/drm/i915/gt/intel_migrate.c (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
1cf586021SChris Wilson // SPDX-License-Identifier: MIT
2cf586021SChris Wilson /*
3cf586021SChris Wilson  * Copyright © 2020 Intel Corporation
4cf586021SChris Wilson  */
5cf586021SChris Wilson 
6cf586021SChris Wilson #include "i915_drv.h"
7cf586021SChris Wilson #include "intel_context.h"
8cf586021SChris Wilson #include "intel_gpu_commands.h"
9cf586021SChris Wilson #include "intel_gt.h"
10cf586021SChris Wilson #include "intel_gtt.h"
11cf586021SChris Wilson #include "intel_migrate.h"
12cf586021SChris Wilson #include "intel_ring.h"
13d09aa852SJani Nikula #include "gem/i915_gem_lmem.h"
14cf586021SChris Wilson 
15cf586021SChris Wilson struct insert_pte_data {
16cf586021SChris Wilson 	u64 offset;
17cf586021SChris Wilson };
18cf586021SChris Wilson 
19cf586021SChris Wilson #define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */
20cf586021SChris Wilson 
2148760ffeSRamalingam C #define GET_CCS_BYTES(i915, size)	(HAS_FLAT_CCS(i915) ? \
2248760ffeSRamalingam C 					 DIV_ROUND_UP(size, NUM_BYTES_PER_CCS_BYTE) : 0)
engine_supports_migration(struct intel_engine_cs * engine)23cf586021SChris Wilson static bool engine_supports_migration(struct intel_engine_cs *engine)
24cf586021SChris Wilson {
25cf586021SChris Wilson 	if (!engine)
26cf586021SChris Wilson 		return false;
27cf586021SChris Wilson 
28cf586021SChris Wilson 	/*
29cf586021SChris Wilson 	 * We need the ability to prevent aribtration (MI_ARB_ON_OFF),
30cf586021SChris Wilson 	 * the ability to write PTE using inline data (MI_STORE_DATA)
31cf586021SChris Wilson 	 * and of course the ability to do the block transfer (blits).
32cf586021SChris Wilson 	 */
33cf586021SChris Wilson 	GEM_BUG_ON(engine->class != COPY_ENGINE_CLASS);
34cf586021SChris Wilson 
35cf586021SChris Wilson 	return true;
36cf586021SChris Wilson }
37cf586021SChris Wilson 
xehpsdv_toggle_pdes(struct i915_address_space * vm,struct i915_page_table * pt,void * data)3800e27ad8SMatthew Auld static void xehpsdv_toggle_pdes(struct i915_address_space *vm,
3900e27ad8SMatthew Auld 				struct i915_page_table *pt,
4000e27ad8SMatthew Auld 				void *data)
4100e27ad8SMatthew Auld {
4200e27ad8SMatthew Auld 	struct insert_pte_data *d = data;
4300e27ad8SMatthew Auld 
4400e27ad8SMatthew Auld 	/*
4500e27ad8SMatthew Auld 	 * Insert a dummy PTE into every PT that will map to LMEM to ensure
4600e27ad8SMatthew Auld 	 * we have a correctly setup PDE structure for later use.
4700e27ad8SMatthew Auld 	 */
489275277dSFei Yang 	vm->insert_page(vm, 0, d->offset,
499275277dSFei Yang 			i915_gem_get_pat_index(vm->i915, I915_CACHE_NONE),
509275277dSFei Yang 			PTE_LM);
5100e27ad8SMatthew Auld 	GEM_BUG_ON(!pt->is_compact);
5200e27ad8SMatthew Auld 	d->offset += SZ_2M;
5300e27ad8SMatthew Auld }
5400e27ad8SMatthew Auld 
xehpsdv_insert_pte(struct i915_address_space * vm,struct i915_page_table * pt,void * data)5500e27ad8SMatthew Auld static void xehpsdv_insert_pte(struct i915_address_space *vm,
5600e27ad8SMatthew Auld 			       struct i915_page_table *pt,
5700e27ad8SMatthew Auld 			       void *data)
5800e27ad8SMatthew Auld {
5900e27ad8SMatthew Auld 	struct insert_pte_data *d = data;
6000e27ad8SMatthew Auld 
6100e27ad8SMatthew Auld 	/*
6200e27ad8SMatthew Auld 	 * We are playing tricks here, since the actual pt, from the hw
6300e27ad8SMatthew Auld 	 * pov, is only 256bytes with 32 entries, or 4096bytes with 512
6400e27ad8SMatthew Auld 	 * entries, but we are still guaranteed that the physical
6500e27ad8SMatthew Auld 	 * alignment is 64K underneath for the pt, and we are careful
6600e27ad8SMatthew Auld 	 * not to access the space in the void.
6700e27ad8SMatthew Auld 	 */
689275277dSFei Yang 	vm->insert_page(vm, px_dma(pt), d->offset,
699275277dSFei Yang 			i915_gem_get_pat_index(vm->i915, I915_CACHE_NONE),
709275277dSFei Yang 			PTE_LM);
7100e27ad8SMatthew Auld 	d->offset += SZ_64K;
7200e27ad8SMatthew Auld }
7300e27ad8SMatthew Auld 
insert_pte(struct i915_address_space * vm,struct i915_page_table * pt,void * data)74cf586021SChris Wilson static void insert_pte(struct i915_address_space *vm,
75cf586021SChris Wilson 		       struct i915_page_table *pt,
76cf586021SChris Wilson 		       void *data)
77cf586021SChris Wilson {
78cf586021SChris Wilson 	struct insert_pte_data *d = data;
79cf586021SChris Wilson 
809275277dSFei Yang 	vm->insert_page(vm, px_dma(pt), d->offset,
819275277dSFei Yang 			i915_gem_get_pat_index(vm->i915, I915_CACHE_NONE),
828eb7fcceSMatthew Auld 			i915_gem_object_is_lmem(pt->base) ? PTE_LM : 0);
83cf586021SChris Wilson 	d->offset += PAGE_SIZE;
84cf586021SChris Wilson }
85cf586021SChris Wilson 
migrate_vm(struct intel_gt * gt)86cf586021SChris Wilson static struct i915_address_space *migrate_vm(struct intel_gt *gt)
87cf586021SChris Wilson {
88cf586021SChris Wilson 	struct i915_vm_pt_stash stash = {};
89cf586021SChris Wilson 	struct i915_ppgtt *vm;
90cf586021SChris Wilson 	int err;
91cf586021SChris Wilson 	int i;
92cf586021SChris Wilson 
93cf586021SChris Wilson 	/*
94cf586021SChris Wilson 	 * We construct a very special VM for use by all migration contexts,
95cf586021SChris Wilson 	 * it is kept pinned so that it can be used at any time. As we need
96cf586021SChris Wilson 	 * to pre-allocate the page directories for the migration VM, this
97cf586021SChris Wilson 	 * limits us to only using a small number of prepared vma.
98cf586021SChris Wilson 	 *
99cf586021SChris Wilson 	 * To be able to pipeline and reschedule migration operations while
100cf586021SChris Wilson 	 * avoiding unnecessary contention on the vm itself, the PTE updates
101cf586021SChris Wilson 	 * are inline with the blits. All the blits use the same fixed
102cf586021SChris Wilson 	 * addresses, with the backing store redirection being updated on the
103cf586021SChris Wilson 	 * fly. Only 2 implicit vma are used for all migration operations.
104cf586021SChris Wilson 	 *
105cf586021SChris Wilson 	 * We lay the ppGTT out as:
106cf586021SChris Wilson 	 *
107cf586021SChris Wilson 	 *	[0, CHUNK_SZ) -> first object
108cf586021SChris Wilson 	 *	[CHUNK_SZ, 2 * CHUNK_SZ) -> second object
109cf586021SChris Wilson 	 *	[2 * CHUNK_SZ, 2 * CHUNK_SZ + 2 * CHUNK_SZ >> 9] -> PTE
110cf586021SChris Wilson 	 *
111cf586021SChris Wilson 	 * By exposing the dma addresses of the page directories themselves
112cf586021SChris Wilson 	 * within the ppGTT, we are then able to rewrite the PTE prior to use.
113cf586021SChris Wilson 	 * But the PTE update and subsequent migration operation must be atomic,
114cf586021SChris Wilson 	 * i.e. within the same non-preemptible window so that we do not switch
115cf586021SChris Wilson 	 * to another migration context that overwrites the PTE.
116cf586021SChris Wilson 	 *
11700e27ad8SMatthew Auld 	 * This changes quite a bit on platforms with HAS_64K_PAGES support,
11800e27ad8SMatthew Auld 	 * where we instead have three windows, each CHUNK_SIZE in size. The
11900e27ad8SMatthew Auld 	 * first is reserved for mapping system-memory, and that just uses the
12000e27ad8SMatthew Auld 	 * 512 entry layout using 4K GTT pages. The other two windows just map
12100e27ad8SMatthew Auld 	 * lmem pages and must use the new compact 32 entry layout using 64K GTT
12200e27ad8SMatthew Auld 	 * pages, which ensures we can address any lmem object that the user
12300e27ad8SMatthew Auld 	 * throws at us. We then also use the xehpsdv_toggle_pdes as a way of
12400e27ad8SMatthew Auld 	 * just toggling the PDE bit(GEN12_PDE_64K) for us, to enable the
12500e27ad8SMatthew Auld 	 * compact layout for each of these page-tables, that fall within the
12600e27ad8SMatthew Auld 	 * [CHUNK_SIZE, 3 * CHUNK_SIZE) range.
12700e27ad8SMatthew Auld 	 *
12800e27ad8SMatthew Auld 	 * We lay the ppGTT out as:
12900e27ad8SMatthew Auld 	 *
13000e27ad8SMatthew Auld 	 * [0, CHUNK_SZ) -> first window/object, maps smem
13100e27ad8SMatthew Auld 	 * [CHUNK_SZ, 2 * CHUNK_SZ) -> second window/object, maps lmem src
13200e27ad8SMatthew Auld 	 * [2 * CHUNK_SZ, 3 * CHUNK_SZ) -> third window/object, maps lmem dst
13300e27ad8SMatthew Auld 	 *
13400e27ad8SMatthew Auld 	 * For the PTE window it's also quite different, since each PTE must
13500e27ad8SMatthew Auld 	 * point to some 64K page, one for each PT(since it's in lmem), and yet
13600e27ad8SMatthew Auld 	 * each is only <= 4096bytes, but since the unused space within that PTE
13700e27ad8SMatthew Auld 	 * range is never touched, this should be fine.
13800e27ad8SMatthew Auld 	 *
13900e27ad8SMatthew Auld 	 * So basically each PT now needs 64K of virtual memory, instead of 4K,
14000e27ad8SMatthew Auld 	 * which looks like:
14100e27ad8SMatthew Auld 	 *
14200e27ad8SMatthew Auld 	 * [3 * CHUNK_SZ, 3 * CHUNK_SZ + ((3 * CHUNK_SZ / SZ_2M) * SZ_64K)] -> PTE
143cf586021SChris Wilson 	 */
144cf586021SChris Wilson 
145a259cc14SThomas Hellström 	vm = i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY);
146cf586021SChris Wilson 	if (IS_ERR(vm))
147cf586021SChris Wilson 		return ERR_CAST(vm);
148cf586021SChris Wilson 
149cf586021SChris Wilson 	if (!vm->vm.allocate_va_range || !vm->vm.foreach) {
150cf586021SChris Wilson 		err = -ENODEV;
151cf586021SChris Wilson 		goto err_vm;
152cf586021SChris Wilson 	}
153cf586021SChris Wilson 
15400e27ad8SMatthew Auld 	if (HAS_64K_PAGES(gt->i915))
15500e27ad8SMatthew Auld 		stash.pt_sz = I915_GTT_PAGE_SIZE_64K;
15600e27ad8SMatthew Auld 
157cf586021SChris Wilson 	/*
158cf586021SChris Wilson 	 * Each engine instance is assigned its own chunk in the VM, so
159cf586021SChris Wilson 	 * that we can run multiple instances concurrently
160cf586021SChris Wilson 	 */
161cf586021SChris Wilson 	for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) {
162cf586021SChris Wilson 		struct intel_engine_cs *engine;
163cf586021SChris Wilson 		u64 base = (u64)i << 32;
164cf586021SChris Wilson 		struct insert_pte_data d = {};
165cf586021SChris Wilson 		struct i915_gem_ww_ctx ww;
166cf586021SChris Wilson 		u64 sz;
167cf586021SChris Wilson 
168cf586021SChris Wilson 		engine = gt->engine_class[COPY_ENGINE_CLASS][i];
169cf586021SChris Wilson 		if (!engine_supports_migration(engine))
170cf586021SChris Wilson 			continue;
171cf586021SChris Wilson 
172cf586021SChris Wilson 		/*
173cf586021SChris Wilson 		 * We copy in 8MiB chunks. Each PDE covers 2MiB, so we need
174cf586021SChris Wilson 		 * 4x2 page directories for source/destination.
175cf586021SChris Wilson 		 */
17600e27ad8SMatthew Auld 		if (HAS_64K_PAGES(gt->i915))
17700e27ad8SMatthew Auld 			sz = 3 * CHUNK_SZ;
17800e27ad8SMatthew Auld 		else
179cf586021SChris Wilson 			sz = 2 * CHUNK_SZ;
180cf586021SChris Wilson 		d.offset = base + sz;
181cf586021SChris Wilson 
182cf586021SChris Wilson 		/*
183cf586021SChris Wilson 		 * We need another page directory setup so that we can write
184cf586021SChris Wilson 		 * the 8x512 PTE in each chunk.
185cf586021SChris Wilson 		 */
18600e27ad8SMatthew Auld 		if (HAS_64K_PAGES(gt->i915))
18700e27ad8SMatthew Auld 			sz += (sz / SZ_2M) * SZ_64K;
18800e27ad8SMatthew Auld 		else
189cf586021SChris Wilson 			sz += (sz >> 12) * sizeof(u64);
190cf586021SChris Wilson 
191cf586021SChris Wilson 		err = i915_vm_alloc_pt_stash(&vm->vm, &stash, sz);
192cf586021SChris Wilson 		if (err)
193cf586021SChris Wilson 			goto err_vm;
194cf586021SChris Wilson 
195cf586021SChris Wilson 		for_i915_gem_ww(&ww, err, true) {
196cf586021SChris Wilson 			err = i915_vm_lock_objects(&vm->vm, &ww);
197cf586021SChris Wilson 			if (err)
198cf586021SChris Wilson 				continue;
199cf586021SChris Wilson 			err = i915_vm_map_pt_stash(&vm->vm, &stash);
200cf586021SChris Wilson 			if (err)
201cf586021SChris Wilson 				continue;
202cf586021SChris Wilson 
203cf586021SChris Wilson 			vm->vm.allocate_va_range(&vm->vm, &stash, base, sz);
204cf586021SChris Wilson 		}
205cf586021SChris Wilson 		i915_vm_free_pt_stash(&vm->vm, &stash);
206cf586021SChris Wilson 		if (err)
207cf586021SChris Wilson 			goto err_vm;
208cf586021SChris Wilson 
209cf586021SChris Wilson 		/* Now allow the GPU to rewrite the PTE via its own ppGTT */
21000e27ad8SMatthew Auld 		if (HAS_64K_PAGES(gt->i915)) {
21100e27ad8SMatthew Auld 			vm->vm.foreach(&vm->vm, base, d.offset - base,
21200e27ad8SMatthew Auld 				       xehpsdv_insert_pte, &d);
21300e27ad8SMatthew Auld 			d.offset = base + CHUNK_SZ;
21400e27ad8SMatthew Auld 			vm->vm.foreach(&vm->vm,
21500e27ad8SMatthew Auld 				       d.offset,
21600e27ad8SMatthew Auld 				       2 * CHUNK_SZ,
21700e27ad8SMatthew Auld 				       xehpsdv_toggle_pdes, &d);
21800e27ad8SMatthew Auld 		} else {
21900e27ad8SMatthew Auld 			vm->vm.foreach(&vm->vm, base, d.offset - base,
22000e27ad8SMatthew Auld 				       insert_pte, &d);
22100e27ad8SMatthew Auld 		}
222cf586021SChris Wilson 	}
223cf586021SChris Wilson 
224cf586021SChris Wilson 	return &vm->vm;
225cf586021SChris Wilson 
226cf586021SChris Wilson err_vm:
227cf586021SChris Wilson 	i915_vm_put(&vm->vm);
228cf586021SChris Wilson 	return ERR_PTR(err);
229cf586021SChris Wilson }
230cf586021SChris Wilson 
first_copy_engine(struct intel_gt * gt)231cf586021SChris Wilson static struct intel_engine_cs *first_copy_engine(struct intel_gt *gt)
232cf586021SChris Wilson {
233cf586021SChris Wilson 	struct intel_engine_cs *engine;
234cf586021SChris Wilson 	int i;
235cf586021SChris Wilson 
236cf586021SChris Wilson 	for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) {
237cf586021SChris Wilson 		engine = gt->engine_class[COPY_ENGINE_CLASS][i];
238cf586021SChris Wilson 		if (engine_supports_migration(engine))
239cf586021SChris Wilson 			return engine;
240cf586021SChris Wilson 	}
241cf586021SChris Wilson 
242cf586021SChris Wilson 	return NULL;
243cf586021SChris Wilson }
244cf586021SChris Wilson 
pinned_context(struct intel_gt * gt)245cf586021SChris Wilson static struct intel_context *pinned_context(struct intel_gt *gt)
246cf586021SChris Wilson {
247cf586021SChris Wilson 	static struct lock_class_key key;
248cf586021SChris Wilson 	struct intel_engine_cs *engine;
249cf586021SChris Wilson 	struct i915_address_space *vm;
250cf586021SChris Wilson 	struct intel_context *ce;
251cf586021SChris Wilson 
252cf586021SChris Wilson 	engine = first_copy_engine(gt);
253cf586021SChris Wilson 	if (!engine)
254cf586021SChris Wilson 		return ERR_PTR(-ENODEV);
255cf586021SChris Wilson 
256cf586021SChris Wilson 	vm = migrate_vm(gt);
257cf586021SChris Wilson 	if (IS_ERR(vm))
258cf586021SChris Wilson 		return ERR_CAST(vm);
259cf586021SChris Wilson 
260cf586021SChris Wilson 	ce = intel_engine_create_pinned_context(engine, vm, SZ_512K,
261cf586021SChris Wilson 						I915_GEM_HWS_MIGRATE,
262cf586021SChris Wilson 						&key, "migrate");
263ff12ce2cSDan Carpenter 	i915_vm_put(vm);
264cf586021SChris Wilson 	return ce;
265cf586021SChris Wilson }
266cf586021SChris Wilson 
intel_migrate_init(struct intel_migrate * m,struct intel_gt * gt)267cf586021SChris Wilson int intel_migrate_init(struct intel_migrate *m, struct intel_gt *gt)
268cf586021SChris Wilson {
269cf586021SChris Wilson 	struct intel_context *ce;
270cf586021SChris Wilson 
271cf586021SChris Wilson 	memset(m, 0, sizeof(*m));
272cf586021SChris Wilson 
273cf586021SChris Wilson 	ce = pinned_context(gt);
274cf586021SChris Wilson 	if (IS_ERR(ce))
275cf586021SChris Wilson 		return PTR_ERR(ce);
276cf586021SChris Wilson 
277cf586021SChris Wilson 	m->context = ce;
278cf586021SChris Wilson 	return 0;
279cf586021SChris Wilson }
280cf586021SChris Wilson 
random_index(unsigned int max)281cf586021SChris Wilson static int random_index(unsigned int max)
282cf586021SChris Wilson {
283cf586021SChris Wilson 	return upper_32_bits(mul_u32_u32(get_random_u32(), max));
284cf586021SChris Wilson }
285cf586021SChris Wilson 
__migrate_engines(struct intel_gt * gt)286cf586021SChris Wilson static struct intel_context *__migrate_engines(struct intel_gt *gt)
287cf586021SChris Wilson {
288cf586021SChris Wilson 	struct intel_engine_cs *engines[MAX_ENGINE_INSTANCE];
289cf586021SChris Wilson 	struct intel_engine_cs *engine;
290cf586021SChris Wilson 	unsigned int count, i;
291cf586021SChris Wilson 
292cf586021SChris Wilson 	count = 0;
293cf586021SChris Wilson 	for (i = 0; i < ARRAY_SIZE(gt->engine_class[COPY_ENGINE_CLASS]); i++) {
294cf586021SChris Wilson 		engine = gt->engine_class[COPY_ENGINE_CLASS][i];
295cf586021SChris Wilson 		if (engine_supports_migration(engine))
296cf586021SChris Wilson 			engines[count++] = engine;
297cf586021SChris Wilson 	}
298cf586021SChris Wilson 
299cf586021SChris Wilson 	return intel_context_create(engines[random_index(count)]);
300cf586021SChris Wilson }
301cf586021SChris Wilson 
intel_migrate_create_context(struct intel_migrate * m)302cf586021SChris Wilson struct intel_context *intel_migrate_create_context(struct intel_migrate *m)
303cf586021SChris Wilson {
304cf586021SChris Wilson 	struct intel_context *ce;
305cf586021SChris Wilson 
306cf586021SChris Wilson 	/*
307cf586021SChris Wilson 	 * We randomly distribute contexts across the engines upon constrction,
308cf586021SChris Wilson 	 * as they all share the same pinned vm, and so in order to allow
309cf586021SChris Wilson 	 * multiple blits to run in parallel, we must construct each blit
310cf586021SChris Wilson 	 * to use a different range of the vm for its GTT. This has to be
311cf586021SChris Wilson 	 * known at construction, so we can not use the late greedy load
312cf586021SChris Wilson 	 * balancing of the virtual-engine.
313cf586021SChris Wilson 	 */
314cf586021SChris Wilson 	ce = __migrate_engines(m->context->engine->gt);
315cf586021SChris Wilson 	if (IS_ERR(ce))
316cf586021SChris Wilson 		return ce;
317cf586021SChris Wilson 
31874e4b909SJason Ekstrand 	ce->ring = NULL;
31974e4b909SJason Ekstrand 	ce->ring_size = SZ_256K;
320cf586021SChris Wilson 
321cf586021SChris Wilson 	i915_vm_put(ce->vm);
322cf586021SChris Wilson 	ce->vm = i915_vm_get(m->context->vm);
323cf586021SChris Wilson 
324cf586021SChris Wilson 	return ce;
325cf586021SChris Wilson }
326cf586021SChris Wilson 
sg_sgt(struct scatterlist * sg)327cf586021SChris Wilson static inline struct sgt_dma sg_sgt(struct scatterlist *sg)
328cf586021SChris Wilson {
329cf586021SChris Wilson 	dma_addr_t addr = sg_dma_address(sg);
330cf586021SChris Wilson 
331cf586021SChris Wilson 	return (struct sgt_dma){ sg, addr, addr + sg_dma_len(sg) };
332cf586021SChris Wilson }
333cf586021SChris Wilson 
emit_no_arbitration(struct i915_request * rq)334cf586021SChris Wilson static int emit_no_arbitration(struct i915_request *rq)
335cf586021SChris Wilson {
336cf586021SChris Wilson 	u32 *cs;
337cf586021SChris Wilson 
338cf586021SChris Wilson 	cs = intel_ring_begin(rq, 2);
339cf586021SChris Wilson 	if (IS_ERR(cs))
340cf586021SChris Wilson 		return PTR_ERR(cs);
341cf586021SChris Wilson 
342cf586021SChris Wilson 	/* Explicitly disable preemption for this request. */
343cf586021SChris Wilson 	*cs++ = MI_ARB_ON_OFF;
344cf586021SChris Wilson 	*cs++ = MI_NOOP;
345cf586021SChris Wilson 	intel_ring_advance(rq, cs);
346cf586021SChris Wilson 
347cf586021SChris Wilson 	return 0;
348cf586021SChris Wilson }
349cf586021SChris Wilson 
max_pte_pkt_size(struct i915_request * rq,int pkt)35035168a6cSChris Wilson static int max_pte_pkt_size(struct i915_request *rq, int pkt)
35135168a6cSChris Wilson {
35235168a6cSChris Wilson 	struct intel_ring *ring = rq->ring;
35335168a6cSChris Wilson 
35435168a6cSChris Wilson 	pkt = min_t(int, pkt, (ring->space - rq->reserved_space) / sizeof(u32) + 5);
35535168a6cSChris Wilson 	pkt = min_t(int, pkt, (ring->size - ring->emit) / sizeof(u32) + 5);
35635168a6cSChris Wilson 
35735168a6cSChris Wilson 	return pkt;
35835168a6cSChris Wilson }
35935168a6cSChris Wilson 
360e288e178SMatthew Auld #define I915_EMIT_PTE_NUM_DWORDS 6
361e288e178SMatthew Auld 
emit_pte(struct i915_request * rq,struct sgt_dma * it,unsigned int pat_index,bool is_lmem,u64 offset,int length)362cf586021SChris Wilson static int emit_pte(struct i915_request *rq,
363cf586021SChris Wilson 		    struct sgt_dma *it,
3649275277dSFei Yang 		    unsigned int pat_index,
365cf586021SChris Wilson 		    bool is_lmem,
366cf586021SChris Wilson 		    u64 offset,
367cf586021SChris Wilson 		    int length)
368cf586021SChris Wilson {
369*d3f23ab9SAndrzej Hajda 	bool has_64K_pages = HAS_64K_PAGES(rq->i915);
3709275277dSFei Yang 	const u64 encode = rq->context->vm->pte_encode(0, pat_index,
371cf586021SChris Wilson 						       is_lmem ? PTE_LM : 0);
372cf586021SChris Wilson 	struct intel_ring *ring = rq->ring;
37300e27ad8SMatthew Auld 	int pkt, dword_length;
37400e27ad8SMatthew Auld 	u32 total = 0;
37500e27ad8SMatthew Auld 	u32 page_size;
376cf586021SChris Wilson 	u32 *hdr, *cs;
377cf586021SChris Wilson 
378*d3f23ab9SAndrzej Hajda 	GEM_BUG_ON(GRAPHICS_VER(rq->i915) < 8);
379cf586021SChris Wilson 
38000e27ad8SMatthew Auld 	page_size = I915_GTT_PAGE_SIZE;
38100e27ad8SMatthew Auld 	dword_length = 0x400;
38200e27ad8SMatthew Auld 
383cf586021SChris Wilson 	/* Compute the page directory offset for the target address range */
38400e27ad8SMatthew Auld 	if (has_64K_pages) {
38500e27ad8SMatthew Auld 		GEM_BUG_ON(!IS_ALIGNED(offset, SZ_2M));
38600e27ad8SMatthew Auld 
38700e27ad8SMatthew Auld 		offset /= SZ_2M;
38800e27ad8SMatthew Auld 		offset *= SZ_64K;
38900e27ad8SMatthew Auld 		offset += 3 * CHUNK_SZ;
39000e27ad8SMatthew Auld 
39100e27ad8SMatthew Auld 		if (is_lmem) {
39200e27ad8SMatthew Auld 			page_size = I915_GTT_PAGE_SIZE_64K;
39300e27ad8SMatthew Auld 			dword_length = 0x40;
39400e27ad8SMatthew Auld 		}
39500e27ad8SMatthew Auld 	} else {
396cf586021SChris Wilson 		offset >>= 12;
397cf586021SChris Wilson 		offset *= sizeof(u64);
398cf586021SChris Wilson 		offset += 2 * CHUNK_SZ;
39900e27ad8SMatthew Auld 	}
40000e27ad8SMatthew Auld 
40108c7c122SMatthew Auld 	offset += (u64)rq->engine->instance << 32;
402cf586021SChris Wilson 
403e288e178SMatthew Auld 	cs = intel_ring_begin(rq, I915_EMIT_PTE_NUM_DWORDS);
404cf586021SChris Wilson 	if (IS_ERR(cs))
405cf586021SChris Wilson 		return PTR_ERR(cs);
406cf586021SChris Wilson 
407cf586021SChris Wilson 	/* Pack as many PTE updates as possible into a single MI command */
40835168a6cSChris Wilson 	pkt = max_pte_pkt_size(rq, dword_length);
409cf586021SChris Wilson 
410cf586021SChris Wilson 	hdr = cs;
411cf586021SChris Wilson 	*cs++ = MI_STORE_DATA_IMM | REG_BIT(21); /* as qword elements */
412cf586021SChris Wilson 	*cs++ = lower_32_bits(offset);
413cf586021SChris Wilson 	*cs++ = upper_32_bits(offset);
414cf586021SChris Wilson 
415cf586021SChris Wilson 	do {
416cf586021SChris Wilson 		if (cs - hdr >= pkt) {
41700e27ad8SMatthew Auld 			int dword_rem;
41800e27ad8SMatthew Auld 
419cf586021SChris Wilson 			*hdr += cs - hdr - 2;
420cf586021SChris Wilson 			*cs++ = MI_NOOP;
421cf586021SChris Wilson 
422cf586021SChris Wilson 			ring->emit = (void *)cs - ring->vaddr;
423cf586021SChris Wilson 			intel_ring_advance(rq, cs);
424cf586021SChris Wilson 			intel_ring_update_space(ring);
425cf586021SChris Wilson 
426e288e178SMatthew Auld 			cs = intel_ring_begin(rq, I915_EMIT_PTE_NUM_DWORDS);
427cf586021SChris Wilson 			if (IS_ERR(cs))
428cf586021SChris Wilson 				return PTR_ERR(cs);
429cf586021SChris Wilson 
43000e27ad8SMatthew Auld 			dword_rem = dword_length;
43100e27ad8SMatthew Auld 			if (has_64K_pages) {
43200e27ad8SMatthew Auld 				if (IS_ALIGNED(total, SZ_2M)) {
43300e27ad8SMatthew Auld 					offset = round_up(offset, SZ_64K);
43400e27ad8SMatthew Auld 				} else {
43500e27ad8SMatthew Auld 					dword_rem = SZ_2M - (total & (SZ_2M - 1));
43600e27ad8SMatthew Auld 					dword_rem /= page_size;
43700e27ad8SMatthew Auld 					dword_rem *= 2;
43800e27ad8SMatthew Auld 				}
43900e27ad8SMatthew Auld 			}
44000e27ad8SMatthew Auld 
44135168a6cSChris Wilson 			pkt = max_pte_pkt_size(rq, dword_rem);
442cf586021SChris Wilson 
443cf586021SChris Wilson 			hdr = cs;
444cf586021SChris Wilson 			*cs++ = MI_STORE_DATA_IMM | REG_BIT(21);
445cf586021SChris Wilson 			*cs++ = lower_32_bits(offset);
446cf586021SChris Wilson 			*cs++ = upper_32_bits(offset);
447cf586021SChris Wilson 		}
448cf586021SChris Wilson 
44900e27ad8SMatthew Auld 		GEM_BUG_ON(!IS_ALIGNED(it->dma, page_size));
45000e27ad8SMatthew Auld 
451cf586021SChris Wilson 		*cs++ = lower_32_bits(encode | it->dma);
452cf586021SChris Wilson 		*cs++ = upper_32_bits(encode | it->dma);
453cf586021SChris Wilson 
454cf586021SChris Wilson 		offset += 8;
45500e27ad8SMatthew Auld 		total += page_size;
456cf586021SChris Wilson 
45700e27ad8SMatthew Auld 		it->dma += page_size;
458cf586021SChris Wilson 		if (it->dma >= it->max) {
459cf586021SChris Wilson 			it->sg = __sg_next(it->sg);
460cf586021SChris Wilson 			if (!it->sg || sg_dma_len(it->sg) == 0)
461cf586021SChris Wilson 				break;
462cf586021SChris Wilson 
463cf586021SChris Wilson 			it->dma = sg_dma_address(it->sg);
464cf586021SChris Wilson 			it->max = it->dma + sg_dma_len(it->sg);
465cf586021SChris Wilson 		}
466cf586021SChris Wilson 	} while (total < length);
467cf586021SChris Wilson 
468cf586021SChris Wilson 	*hdr += cs - hdr - 2;
469cf586021SChris Wilson 	*cs++ = MI_NOOP;
470cf586021SChris Wilson 
471cf586021SChris Wilson 	ring->emit = (void *)cs - ring->vaddr;
472cf586021SChris Wilson 	intel_ring_advance(rq, cs);
473cf586021SChris Wilson 	intel_ring_update_space(ring);
474cf586021SChris Wilson 
475cf586021SChris Wilson 	return total;
476cf586021SChris Wilson }
477cf586021SChris Wilson 
wa_1209644611_applies(int ver,u32 size)4787c517f83SLucas De Marchi static bool wa_1209644611_applies(int ver, u32 size)
479cf586021SChris Wilson {
480cf586021SChris Wilson 	u32 height = size >> PAGE_SHIFT;
481cf586021SChris Wilson 
4827c517f83SLucas De Marchi 	if (ver != 11)
483cf586021SChris Wilson 		return false;
484cf586021SChris Wilson 
485cf586021SChris Wilson 	return height % 4 == 3 && height <= 8;
486cf586021SChris Wilson }
487cf586021SChris Wilson 
48848760ffeSRamalingam C /**
48948760ffeSRamalingam C  * DOC: Flat-CCS - Memory compression for Local memory
49048760ffeSRamalingam C  *
49148760ffeSRamalingam C  * On Xe-HP and later devices, we use dedicated compression control state (CCS)
49248760ffeSRamalingam C  * stored in local memory for each surface, to support the 3D and media
49348760ffeSRamalingam C  * compression formats.
49448760ffeSRamalingam C  *
49548760ffeSRamalingam C  * The memory required for the CCS of the entire local memory is 1/256 of the
49648760ffeSRamalingam C  * local memory size. So before the kernel boot, the required memory is reserved
49748760ffeSRamalingam C  * for the CCS data and a secure register will be programmed with the CCS base
49848760ffeSRamalingam C  * address.
49948760ffeSRamalingam C  *
50048760ffeSRamalingam C  * Flat CCS data needs to be cleared when a lmem object is allocated.
50148760ffeSRamalingam C  * And CCS data can be copied in and out of CCS region through
50248760ffeSRamalingam C  * XY_CTRL_SURF_COPY_BLT. CPU can't access the CCS data directly.
50348760ffeSRamalingam C  *
5046e29832fSRamalingam C  * I915 supports Flat-CCS on lmem only objects. When an objects has smem in
5056e29832fSRamalingam C  * its preference list, on memory pressure, i915 needs to migrate the lmem
5066e29832fSRamalingam C  * content into smem. If the lmem object is Flat-CCS compressed by userspace,
5076e29832fSRamalingam C  * then i915 needs to decompress it. But I915 lack the required information
5086e29832fSRamalingam C  * for such decompression. Hence I915 supports Flat-CCS only on lmem only objects.
50948760ffeSRamalingam C  *
5106e29832fSRamalingam C  * When we exhaust the lmem, Flat-CCS capable objects' lmem backing memory can
5116e29832fSRamalingam C  * be temporarily evicted to smem, along with the auxiliary CCS state, where
5126e29832fSRamalingam C  * it can be potentially swapped-out at a later point, if required.
5136e29832fSRamalingam C  * If userspace later touches the evicted pages, then we always move
5146e29832fSRamalingam C  * the backing memory back to lmem, which includes restoring the saved CCS state,
5156e29832fSRamalingam C  * and potentially performing any required swap-in.
5166e29832fSRamalingam C  *
5176e29832fSRamalingam C  * For the migration of the lmem objects with smem in placement list, such as
5186e29832fSRamalingam C  * {lmem, smem}, objects are treated as non Flat-CCS capable objects.
51948760ffeSRamalingam C  */
52048760ffeSRamalingam C 
i915_flush_dw(u32 * cmd,u32 flags)52148760ffeSRamalingam C static inline u32 *i915_flush_dw(u32 *cmd, u32 flags)
52248760ffeSRamalingam C {
52348760ffeSRamalingam C 	*cmd++ = MI_FLUSH_DW | flags;
52448760ffeSRamalingam C 	*cmd++ = 0;
52548760ffeSRamalingam C 	*cmd++ = 0;
52648760ffeSRamalingam C 
52748760ffeSRamalingam C 	return cmd;
52848760ffeSRamalingam C }
52948760ffeSRamalingam C 
emit_copy_ccs(struct i915_request * rq,u32 dst_offset,u8 dst_access,u32 src_offset,u8 src_access,int size)53048760ffeSRamalingam C static int emit_copy_ccs(struct i915_request *rq,
53148760ffeSRamalingam C 			 u32 dst_offset, u8 dst_access,
53248760ffeSRamalingam C 			 u32 src_offset, u8 src_access, int size)
53348760ffeSRamalingam C {
534*d3f23ab9SAndrzej Hajda 	struct drm_i915_private *i915 = rq->i915;
53548760ffeSRamalingam C 	int mocs = rq->engine->gt->mocs.uc_index << 1;
536dba4d442SMatthew Auld 	u32 num_ccs_blks;
53748760ffeSRamalingam C 	u32 *cs;
53848760ffeSRamalingam C 
539dba4d442SMatthew Auld 	cs = intel_ring_begin(rq, 12);
54048760ffeSRamalingam C 	if (IS_ERR(cs))
54148760ffeSRamalingam C 		return PTR_ERR(cs);
54248760ffeSRamalingam C 
54348760ffeSRamalingam C 	num_ccs_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size),
54448760ffeSRamalingam C 				    NUM_CCS_BYTES_PER_BLOCK);
54548760ffeSRamalingam C 	GEM_BUG_ON(num_ccs_blks > NUM_CCS_BLKS_PER_XFER);
54648760ffeSRamalingam C 	cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS);
54748760ffeSRamalingam C 
54848760ffeSRamalingam C 	/*
54948760ffeSRamalingam C 	 * The XY_CTRL_SURF_COPY_BLT instruction is used to copy the CCS
55048760ffeSRamalingam C 	 * data in and out of the CCS region.
55148760ffeSRamalingam C 	 *
55248760ffeSRamalingam C 	 * We can copy at most 1024 blocks of 256 bytes using one
55348760ffeSRamalingam C 	 * XY_CTRL_SURF_COPY_BLT instruction.
55448760ffeSRamalingam C 	 *
55548760ffeSRamalingam C 	 * In case we need to copy more than 1024 blocks, we need to add
55648760ffeSRamalingam C 	 * another instruction to the same batch buffer.
55748760ffeSRamalingam C 	 *
55848760ffeSRamalingam C 	 * 1024 blocks of 256 bytes of CCS represent a total 256KB of CCS.
55948760ffeSRamalingam C 	 *
56048760ffeSRamalingam C 	 * 256 KB of CCS represents 256 * 256 KB = 64 MB of LMEM.
56148760ffeSRamalingam C 	 */
56248760ffeSRamalingam C 	*cs++ = XY_CTRL_SURF_COPY_BLT |
56348760ffeSRamalingam C 		src_access << SRC_ACCESS_TYPE_SHIFT |
56448760ffeSRamalingam C 		dst_access << DST_ACCESS_TYPE_SHIFT |
56548760ffeSRamalingam C 		((num_ccs_blks - 1) & CCS_SIZE_MASK) << CCS_SIZE_SHIFT;
56648760ffeSRamalingam C 	*cs++ = src_offset;
56748760ffeSRamalingam C 	*cs++ = rq->engine->instance |
56848760ffeSRamalingam C 		FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
56948760ffeSRamalingam C 	*cs++ = dst_offset;
57048760ffeSRamalingam C 	*cs++ = rq->engine->instance |
57148760ffeSRamalingam C 		FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
57248760ffeSRamalingam C 
57348760ffeSRamalingam C 	cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS);
57448760ffeSRamalingam C 	*cs++ = MI_NOOP;
57548760ffeSRamalingam C 
57648760ffeSRamalingam C 	intel_ring_advance(rq, cs);
57748760ffeSRamalingam C 
57848760ffeSRamalingam C 	return 0;
57948760ffeSRamalingam C }
58048760ffeSRamalingam C 
emit_copy(struct i915_request * rq,u32 dst_offset,u32 src_offset,int size)58100e27ad8SMatthew Auld static int emit_copy(struct i915_request *rq,
58200e27ad8SMatthew Auld 		     u32 dst_offset, u32 src_offset, int size)
583cf586021SChris Wilson {
584*d3f23ab9SAndrzej Hajda 	const int ver = GRAPHICS_VER(rq->i915);
585cf586021SChris Wilson 	u32 instance = rq->engine->instance;
586cf586021SChris Wilson 	u32 *cs;
587cf586021SChris Wilson 
5887c517f83SLucas De Marchi 	cs = intel_ring_begin(rq, ver >= 8 ? 10 : 6);
589cf586021SChris Wilson 	if (IS_ERR(cs))
590cf586021SChris Wilson 		return PTR_ERR(cs);
591cf586021SChris Wilson 
5927c517f83SLucas De Marchi 	if (ver >= 9 && !wa_1209644611_applies(ver, size)) {
593cf586021SChris Wilson 		*cs++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
594cf586021SChris Wilson 		*cs++ = BLT_DEPTH_32 | PAGE_SIZE;
595cf586021SChris Wilson 		*cs++ = 0;
596cf586021SChris Wilson 		*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
59700e27ad8SMatthew Auld 		*cs++ = dst_offset;
598cf586021SChris Wilson 		*cs++ = instance;
599cf586021SChris Wilson 		*cs++ = 0;
600cf586021SChris Wilson 		*cs++ = PAGE_SIZE;
60100e27ad8SMatthew Auld 		*cs++ = src_offset;
602cf586021SChris Wilson 		*cs++ = instance;
6037c517f83SLucas De Marchi 	} else if (ver >= 8) {
604cf586021SChris Wilson 		*cs++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2);
605cf586021SChris Wilson 		*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
606cf586021SChris Wilson 		*cs++ = 0;
607cf586021SChris Wilson 		*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
60800e27ad8SMatthew Auld 		*cs++ = dst_offset;
609cf586021SChris Wilson 		*cs++ = instance;
610cf586021SChris Wilson 		*cs++ = 0;
611cf586021SChris Wilson 		*cs++ = PAGE_SIZE;
61200e27ad8SMatthew Auld 		*cs++ = src_offset;
613cf586021SChris Wilson 		*cs++ = instance;
614cf586021SChris Wilson 	} else {
615cf586021SChris Wilson 		GEM_BUG_ON(instance);
616cf586021SChris Wilson 		*cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
617cf586021SChris Wilson 		*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
618cf586021SChris Wilson 		*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE;
61900e27ad8SMatthew Auld 		*cs++ = dst_offset;
620cf586021SChris Wilson 		*cs++ = PAGE_SIZE;
62100e27ad8SMatthew Auld 		*cs++ = src_offset;
622cf586021SChris Wilson 	}
623cf586021SChris Wilson 
624cf586021SChris Wilson 	intel_ring_advance(rq, cs);
625cf586021SChris Wilson 	return 0;
626cf586021SChris Wilson }
627cf586021SChris Wilson 
scatter_list_length(struct scatterlist * sg)6288676145eSMatthew Auld static u64 scatter_list_length(struct scatterlist *sg)
629da0595aeSRamalingam C {
6308676145eSMatthew Auld 	u64 len = 0;
631da0595aeSRamalingam C 
632da0595aeSRamalingam C 	while (sg && sg_dma_len(sg)) {
633da0595aeSRamalingam C 		len += sg_dma_len(sg);
634da0595aeSRamalingam C 		sg = sg_next(sg);
6352be1959eSJason Wang 	}
636da0595aeSRamalingam C 
637da0595aeSRamalingam C 	return len;
638da0595aeSRamalingam C }
639da0595aeSRamalingam C 
6408676145eSMatthew Auld static int
calculate_chunk_sz(struct drm_i915_private * i915,bool src_is_lmem,u64 bytes_to_cpy,u64 ccs_bytes_to_cpy)641da0595aeSRamalingam C calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem,
6428676145eSMatthew Auld 		   u64 bytes_to_cpy, u64 ccs_bytes_to_cpy)
643da0595aeSRamalingam C {
6448676145eSMatthew Auld 	if (ccs_bytes_to_cpy && !src_is_lmem)
645da0595aeSRamalingam C 		/*
646da0595aeSRamalingam C 		 * When CHUNK_SZ is passed all the pages upto CHUNK_SZ
647da0595aeSRamalingam C 		 * will be taken for the blt. in Flat-ccs supported
648da0595aeSRamalingam C 		 * platform Smem obj will have more pages than required
649da0595aeSRamalingam C 		 * for main meory hence limit it to the required size
650da0595aeSRamalingam C 		 * for main memory
651da0595aeSRamalingam C 		 */
6528676145eSMatthew Auld 		return min_t(u64, bytes_to_cpy, CHUNK_SZ);
6538676145eSMatthew Auld 	else
6548676145eSMatthew Auld 		return CHUNK_SZ;
655da0595aeSRamalingam C }
656da0595aeSRamalingam C 
get_ccs_sg_sgt(struct sgt_dma * it,u64 bytes_to_cpy)6578676145eSMatthew Auld static void get_ccs_sg_sgt(struct sgt_dma *it, u64 bytes_to_cpy)
658da0595aeSRamalingam C {
6598676145eSMatthew Auld 	u64 len;
660da0595aeSRamalingam C 
661da0595aeSRamalingam C 	do {
662da0595aeSRamalingam C 		GEM_BUG_ON(!it->sg || !sg_dma_len(it->sg));
663da0595aeSRamalingam C 		len = it->max - it->dma;
664da0595aeSRamalingam C 		if (len > bytes_to_cpy) {
665da0595aeSRamalingam C 			it->dma += bytes_to_cpy;
666da0595aeSRamalingam C 			break;
667da0595aeSRamalingam C 		}
668da0595aeSRamalingam C 
669da0595aeSRamalingam C 		bytes_to_cpy -= len;
670da0595aeSRamalingam C 
671da0595aeSRamalingam C 		it->sg = __sg_next(it->sg);
672da0595aeSRamalingam C 		it->dma = sg_dma_address(it->sg);
673da0595aeSRamalingam C 		it->max = it->dma + sg_dma_len(it->sg);
674da0595aeSRamalingam C 	} while (bytes_to_cpy);
675da0595aeSRamalingam C }
676da0595aeSRamalingam C 
677cf586021SChris Wilson int
intel_context_migrate_copy(struct intel_context * ce,const struct i915_deps * deps,struct scatterlist * src,unsigned int src_pat_index,bool src_is_lmem,struct scatterlist * dst,unsigned int dst_pat_index,bool dst_is_lmem,struct i915_request ** out)678cf586021SChris Wilson intel_context_migrate_copy(struct intel_context *ce,
67911930817SThomas Hellström 			   const struct i915_deps *deps,
680cf586021SChris Wilson 			   struct scatterlist *src,
6819275277dSFei Yang 			   unsigned int src_pat_index,
682cf586021SChris Wilson 			   bool src_is_lmem,
683cf586021SChris Wilson 			   struct scatterlist *dst,
6849275277dSFei Yang 			   unsigned int dst_pat_index,
685cf586021SChris Wilson 			   bool dst_is_lmem,
686cf586021SChris Wilson 			   struct i915_request **out)
687cf586021SChris Wilson {
688da0595aeSRamalingam C 	struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst), it_ccs;
689da0595aeSRamalingam C 	struct drm_i915_private *i915 = ce->engine->i915;
6908676145eSMatthew Auld 	u64 ccs_bytes_to_cpy = 0, bytes_to_cpy;
6919275277dSFei Yang 	unsigned int ccs_pat_index;
6926e6bc8c0SRamalingam C 	u32 src_offset, dst_offset;
693da0595aeSRamalingam C 	u8 src_access, dst_access;
694cf586021SChris Wilson 	struct i915_request *rq;
6958676145eSMatthew Auld 	u64 src_sz, dst_sz;
696353819d8SMatthew Auld 	bool ccs_is_src, overwrite_ccs;
697cf586021SChris Wilson 	int err;
698cf586021SChris Wilson 
69994ce0d65SChris Wilson 	GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
700552caa1fSMatthew Auld 	GEM_BUG_ON(IS_DGFX(ce->engine->i915) && (!src_is_lmem && !dst_is_lmem));
701cf586021SChris Wilson 	*out = NULL;
702cf586021SChris Wilson 
703cf586021SChris Wilson 	GEM_BUG_ON(ce->ring->size < SZ_64K);
704cf586021SChris Wilson 
705da0595aeSRamalingam C 	src_sz = scatter_list_length(src);
706da0595aeSRamalingam C 	bytes_to_cpy = src_sz;
707da0595aeSRamalingam C 
708da0595aeSRamalingam C 	if (HAS_FLAT_CCS(i915) && src_is_lmem ^ dst_is_lmem) {
709da0595aeSRamalingam C 		src_access = !src_is_lmem && dst_is_lmem;
710da0595aeSRamalingam C 		dst_access = !src_access;
711da0595aeSRamalingam C 
712da0595aeSRamalingam C 		dst_sz = scatter_list_length(dst);
713da0595aeSRamalingam C 		if (src_is_lmem) {
714da0595aeSRamalingam C 			it_ccs = it_dst;
7159275277dSFei Yang 			ccs_pat_index = dst_pat_index;
716da0595aeSRamalingam C 			ccs_is_src = false;
717da0595aeSRamalingam C 		} else if (dst_is_lmem) {
718da0595aeSRamalingam C 			bytes_to_cpy = dst_sz;
719da0595aeSRamalingam C 			it_ccs = it_src;
7209275277dSFei Yang 			ccs_pat_index = src_pat_index;
721da0595aeSRamalingam C 			ccs_is_src = true;
722da0595aeSRamalingam C 		}
723da0595aeSRamalingam C 
724da0595aeSRamalingam C 		/*
725da0595aeSRamalingam C 		 * When there is a eviction of ccs needed smem will have the
726da0595aeSRamalingam C 		 * extra pages for the ccs data
727da0595aeSRamalingam C 		 *
728da0595aeSRamalingam C 		 * TO-DO: Want to move the size mismatch check to a WARN_ON,
729da0595aeSRamalingam C 		 * but still we have some requests of smem->lmem with same size.
730da0595aeSRamalingam C 		 * Need to fix it.
731da0595aeSRamalingam C 		 */
732da0595aeSRamalingam C 		ccs_bytes_to_cpy = src_sz != dst_sz ? GET_CCS_BYTES(i915, bytes_to_cpy) : 0;
733da0595aeSRamalingam C 		if (ccs_bytes_to_cpy)
734da0595aeSRamalingam C 			get_ccs_sg_sgt(&it_ccs, bytes_to_cpy);
735da0595aeSRamalingam C 	}
736da0595aeSRamalingam C 
737353819d8SMatthew Auld 	overwrite_ccs = HAS_FLAT_CCS(i915) && !ccs_bytes_to_cpy && dst_is_lmem;
738353819d8SMatthew Auld 
7396e6bc8c0SRamalingam C 	src_offset = 0;
7406e6bc8c0SRamalingam C 	dst_offset = CHUNK_SZ;
7416e6bc8c0SRamalingam C 	if (HAS_64K_PAGES(ce->engine->i915)) {
7426e6bc8c0SRamalingam C 		src_offset = 0;
7436e6bc8c0SRamalingam C 		dst_offset = 0;
7446e6bc8c0SRamalingam C 		if (src_is_lmem)
7456e6bc8c0SRamalingam C 			src_offset = CHUNK_SZ;
7466e6bc8c0SRamalingam C 		if (dst_is_lmem)
7476e6bc8c0SRamalingam C 			dst_offset = 2 * CHUNK_SZ;
7486e6bc8c0SRamalingam C 	}
7496e6bc8c0SRamalingam C 
750cf586021SChris Wilson 	do {
751cf586021SChris Wilson 		int len;
752cf586021SChris Wilson 
753cf586021SChris Wilson 		rq = i915_request_create(ce);
754cf586021SChris Wilson 		if (IS_ERR(rq)) {
755cf586021SChris Wilson 			err = PTR_ERR(rq);
756cf586021SChris Wilson 			goto out_ce;
757cf586021SChris Wilson 		}
758cf586021SChris Wilson 
75911930817SThomas Hellström 		if (deps) {
76011930817SThomas Hellström 			err = i915_request_await_deps(rq, deps);
761cf586021SChris Wilson 			if (err)
762cf586021SChris Wilson 				goto out_rq;
763cf586021SChris Wilson 
764cf586021SChris Wilson 			if (rq->engine->emit_init_breadcrumb) {
765cf586021SChris Wilson 				err = rq->engine->emit_init_breadcrumb(rq);
766cf586021SChris Wilson 				if (err)
767cf586021SChris Wilson 					goto out_rq;
768cf586021SChris Wilson 			}
769cf586021SChris Wilson 
77011930817SThomas Hellström 			deps = NULL;
771cf586021SChris Wilson 		}
772cf586021SChris Wilson 
773cf586021SChris Wilson 		/* The PTE updates + copy must not be interrupted. */
774cf586021SChris Wilson 		err = emit_no_arbitration(rq);
775cf586021SChris Wilson 		if (err)
776cf586021SChris Wilson 			goto out_rq;
777cf586021SChris Wilson 
7788676145eSMatthew Auld 		src_sz = calculate_chunk_sz(i915, src_is_lmem,
779da0595aeSRamalingam C 					    bytes_to_cpy, ccs_bytes_to_cpy);
780da0595aeSRamalingam C 
7819275277dSFei Yang 		len = emit_pte(rq, &it_src, src_pat_index, src_is_lmem,
782da0595aeSRamalingam C 			       src_offset, src_sz);
783310bf25dSRamalingam C 		if (!len) {
784310bf25dSRamalingam C 			err = -EINVAL;
785310bf25dSRamalingam C 			goto out_rq;
786310bf25dSRamalingam C 		}
787310bf25dSRamalingam C 		if (len < 0) {
788cf586021SChris Wilson 			err = len;
789cf586021SChris Wilson 			goto out_rq;
790cf586021SChris Wilson 		}
791cf586021SChris Wilson 
7929275277dSFei Yang 		err = emit_pte(rq, &it_dst, dst_pat_index, dst_is_lmem,
79300e27ad8SMatthew Auld 			       dst_offset, len);
794cf586021SChris Wilson 		if (err < 0)
795cf586021SChris Wilson 			goto out_rq;
796cf586021SChris Wilson 		if (err < len) {
797cf586021SChris Wilson 			err = -EINVAL;
798cf586021SChris Wilson 			goto out_rq;
799cf586021SChris Wilson 		}
800cf586021SChris Wilson 
801cf586021SChris Wilson 		err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
802cf586021SChris Wilson 		if (err)
803cf586021SChris Wilson 			goto out_rq;
804cf586021SChris Wilson 
80500e27ad8SMatthew Auld 		err = emit_copy(rq, dst_offset,	src_offset, len);
806da0595aeSRamalingam C 		if (err)
807da0595aeSRamalingam C 			goto out_rq;
808da0595aeSRamalingam C 
809da0595aeSRamalingam C 		bytes_to_cpy -= len;
810da0595aeSRamalingam C 
811da0595aeSRamalingam C 		if (ccs_bytes_to_cpy) {
812b8c9d486SRamalingam C 			int ccs_sz;
813b8c9d486SRamalingam C 
814da0595aeSRamalingam C 			err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
815da0595aeSRamalingam C 			if (err)
816da0595aeSRamalingam C 				goto out_rq;
817da0595aeSRamalingam C 
818b8c9d486SRamalingam C 			ccs_sz = GET_CCS_BYTES(i915, len);
8199275277dSFei Yang 			err = emit_pte(rq, &it_ccs, ccs_pat_index, false,
820da0595aeSRamalingam C 				       ccs_is_src ? src_offset : dst_offset,
821da0595aeSRamalingam C 				       ccs_sz);
822b8c9d486SRamalingam C 			if (err < 0)
823b8c9d486SRamalingam C 				goto out_rq;
824b8c9d486SRamalingam C 			if (err < ccs_sz) {
825b8c9d486SRamalingam C 				err = -EINVAL;
826b8c9d486SRamalingam C 				goto out_rq;
827b8c9d486SRamalingam C 			}
828da0595aeSRamalingam C 
829da0595aeSRamalingam C 			err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
830da0595aeSRamalingam C 			if (err)
831da0595aeSRamalingam C 				goto out_rq;
832da0595aeSRamalingam C 
833da0595aeSRamalingam C 			err = emit_copy_ccs(rq, dst_offset, dst_access,
834b8c9d486SRamalingam C 					    src_offset, src_access, len);
835da0595aeSRamalingam C 			if (err)
836da0595aeSRamalingam C 				goto out_rq;
837da0595aeSRamalingam C 
838da0595aeSRamalingam C 			err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
839da0595aeSRamalingam C 			if (err)
840da0595aeSRamalingam C 				goto out_rq;
841da0595aeSRamalingam C 			ccs_bytes_to_cpy -= ccs_sz;
842353819d8SMatthew Auld 		} else if (overwrite_ccs) {
843353819d8SMatthew Auld 			err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
844353819d8SMatthew Auld 			if (err)
845353819d8SMatthew Auld 				goto out_rq;
846353819d8SMatthew Auld 
847b29d26fbSMatthew Auld 			if (src_is_lmem) {
848353819d8SMatthew Auld 				/*
849b29d26fbSMatthew Auld 				 * If the src is already in lmem, then we must
850b29d26fbSMatthew Auld 				 * be doing an lmem -> lmem transfer, and so
851b29d26fbSMatthew Auld 				 * should be safe to directly copy the CCS
852b29d26fbSMatthew Auld 				 * state. In this case we have either
853b29d26fbSMatthew Auld 				 * initialised the CCS aux state when first
854b29d26fbSMatthew Auld 				 * clearing the pages (since it is already
855b29d26fbSMatthew Auld 				 * allocated in lmem), or the user has
856b29d26fbSMatthew Auld 				 * potentially populated it, in which case we
857b29d26fbSMatthew Auld 				 * need to copy the CCS state as-is.
858353819d8SMatthew Auld 				 */
859b29d26fbSMatthew Auld 				err = emit_copy_ccs(rq,
860b29d26fbSMatthew Auld 						    dst_offset, INDIRECT_ACCESS,
861b29d26fbSMatthew Auld 						    src_offset, INDIRECT_ACCESS,
862b29d26fbSMatthew Auld 						    len);
863b29d26fbSMatthew Auld 			} else {
864b29d26fbSMatthew Auld 				/*
865b29d26fbSMatthew Auld 				 * While we can't always restore/manage the CCS
866b29d26fbSMatthew Auld 				 * state, we still need to ensure we don't leak
867b29d26fbSMatthew Auld 				 * the CCS state from the previous user, so make
868b29d26fbSMatthew Auld 				 * sure we overwrite it with something.
869b29d26fbSMatthew Auld 				 */
870b29d26fbSMatthew Auld 				err = emit_copy_ccs(rq,
871b29d26fbSMatthew Auld 						    dst_offset, INDIRECT_ACCESS,
872b29d26fbSMatthew Auld 						    dst_offset, DIRECT_ACCESS,
873b29d26fbSMatthew Auld 						    len);
874b29d26fbSMatthew Auld 			}
875b29d26fbSMatthew Auld 
876353819d8SMatthew Auld 			if (err)
877353819d8SMatthew Auld 				goto out_rq;
878353819d8SMatthew Auld 
879353819d8SMatthew Auld 			err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
880353819d8SMatthew Auld 			if (err)
881353819d8SMatthew Auld 				goto out_rq;
882da0595aeSRamalingam C 		}
883cf586021SChris Wilson 
884cf586021SChris Wilson 		/* Arbitration is re-enabled between requests. */
885cf586021SChris Wilson out_rq:
886cf586021SChris Wilson 		if (*out)
887cf586021SChris Wilson 			i915_request_put(*out);
888cf586021SChris Wilson 		*out = i915_request_get(rq);
889cf586021SChris Wilson 		i915_request_add(rq);
890da0595aeSRamalingam C 
891da0595aeSRamalingam C 		if (err)
892cf586021SChris Wilson 			break;
893cf586021SChris Wilson 
894da0595aeSRamalingam C 		if (!bytes_to_cpy && !ccs_bytes_to_cpy) {
895da0595aeSRamalingam C 			if (src_is_lmem)
896da0595aeSRamalingam C 				WARN_ON(it_src.sg && sg_dma_len(it_src.sg));
897da0595aeSRamalingam C 			else
898da0595aeSRamalingam C 				WARN_ON(it_dst.sg && sg_dma_len(it_dst.sg));
899da0595aeSRamalingam C 			break;
900da0595aeSRamalingam C 		}
901da0595aeSRamalingam C 
902da0595aeSRamalingam C 		if (WARN_ON(!it_src.sg || !sg_dma_len(it_src.sg) ||
903da0595aeSRamalingam C 			    !it_dst.sg || !sg_dma_len(it_dst.sg) ||
904da0595aeSRamalingam C 			    (ccs_bytes_to_cpy && (!it_ccs.sg ||
905da0595aeSRamalingam C 						  !sg_dma_len(it_ccs.sg))))) {
906da0595aeSRamalingam C 			err = -EINVAL;
907da0595aeSRamalingam C 			break;
908da0595aeSRamalingam C 		}
909da0595aeSRamalingam C 
910cf586021SChris Wilson 		cond_resched();
911cf586021SChris Wilson 	} while (1);
912cf586021SChris Wilson 
913cf586021SChris Wilson out_ce:
914cf586021SChris Wilson 	return err;
915cf586021SChris Wilson }
916cf586021SChris Wilson 
emit_clear(struct i915_request * rq,u32 offset,int size,u32 value,bool is_lmem)917a0ed9c95SRamalingam C static int emit_clear(struct i915_request *rq, u32 offset, int size,
918a0ed9c95SRamalingam C 		      u32 value, bool is_lmem)
919563baae1SChris Wilson {
920*d3f23ab9SAndrzej Hajda 	struct drm_i915_private *i915 = rq->i915;
921a0ed9c95SRamalingam C 	int mocs = rq->engine->gt->mocs.uc_index << 1;
922a0ed9c95SRamalingam C 	const int ver = GRAPHICS_VER(i915);
923a0ed9c95SRamalingam C 	int ring_sz;
924563baae1SChris Wilson 	u32 *cs;
925563baae1SChris Wilson 
926563baae1SChris Wilson 	GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
927563baae1SChris Wilson 
92800b9dd06SPallavi Mishra 	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
929a0ed9c95SRamalingam C 		ring_sz = XY_FAST_COLOR_BLT_DW;
930a0ed9c95SRamalingam C 	else if (ver >= 8)
931a0ed9c95SRamalingam C 		ring_sz = 8;
932a0ed9c95SRamalingam C 	else
933a0ed9c95SRamalingam C 		ring_sz = 6;
934a0ed9c95SRamalingam C 
935a0ed9c95SRamalingam C 	cs = intel_ring_begin(rq, ring_sz);
936563baae1SChris Wilson 	if (IS_ERR(cs))
937563baae1SChris Wilson 		return PTR_ERR(cs);
938563baae1SChris Wilson 
93900b9dd06SPallavi Mishra 	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
940a0ed9c95SRamalingam C 		*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
941a0ed9c95SRamalingam C 			(XY_FAST_COLOR_BLT_DW - 2);
942a0ed9c95SRamalingam C 		*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
943a0ed9c95SRamalingam C 			(PAGE_SIZE - 1);
944a0ed9c95SRamalingam C 		*cs++ = 0;
945a0ed9c95SRamalingam C 		*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
946a0ed9c95SRamalingam C 		*cs++ = offset;
947a0ed9c95SRamalingam C 		*cs++ = rq->engine->instance;
948a0ed9c95SRamalingam C 		*cs++ = !is_lmem << XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT;
949a0ed9c95SRamalingam C 		/* BG7 */
950a0ed9c95SRamalingam C 		*cs++ = value;
951a0ed9c95SRamalingam C 		*cs++ = 0;
952a0ed9c95SRamalingam C 		*cs++ = 0;
953a0ed9c95SRamalingam C 		*cs++ = 0;
954a0ed9c95SRamalingam C 		/* BG11 */
955a0ed9c95SRamalingam C 		*cs++ = 0;
956a0ed9c95SRamalingam C 		*cs++ = 0;
957a0ed9c95SRamalingam C 		/* BG13 */
958a0ed9c95SRamalingam C 		*cs++ = 0;
959a0ed9c95SRamalingam C 		*cs++ = 0;
960a0ed9c95SRamalingam C 		*cs++ = 0;
961a0ed9c95SRamalingam C 	} else if (ver >= 8) {
962563baae1SChris Wilson 		*cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
963563baae1SChris Wilson 		*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
964563baae1SChris Wilson 		*cs++ = 0;
965563baae1SChris Wilson 		*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
966fd5803e5SRamalingam C 		*cs++ = offset;
967fd5803e5SRamalingam C 		*cs++ = rq->engine->instance;
968563baae1SChris Wilson 		*cs++ = value;
969563baae1SChris Wilson 		*cs++ = MI_NOOP;
970563baae1SChris Wilson 	} else {
971563baae1SChris Wilson 		*cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
972563baae1SChris Wilson 		*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
973563baae1SChris Wilson 		*cs++ = 0;
974563baae1SChris Wilson 		*cs++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
975fd5803e5SRamalingam C 		*cs++ = offset;
976563baae1SChris Wilson 		*cs++ = value;
977563baae1SChris Wilson 	}
978563baae1SChris Wilson 
979563baae1SChris Wilson 	intel_ring_advance(rq, cs);
980563baae1SChris Wilson 	return 0;
981563baae1SChris Wilson }
982563baae1SChris Wilson 
983563baae1SChris Wilson int
intel_context_migrate_clear(struct intel_context * ce,const struct i915_deps * deps,struct scatterlist * sg,unsigned int pat_index,bool is_lmem,u32 value,struct i915_request ** out)984563baae1SChris Wilson intel_context_migrate_clear(struct intel_context *ce,
98511930817SThomas Hellström 			    const struct i915_deps *deps,
986563baae1SChris Wilson 			    struct scatterlist *sg,
9879275277dSFei Yang 			    unsigned int pat_index,
988563baae1SChris Wilson 			    bool is_lmem,
989563baae1SChris Wilson 			    u32 value,
990563baae1SChris Wilson 			    struct i915_request **out)
991563baae1SChris Wilson {
99248760ffeSRamalingam C 	struct drm_i915_private *i915 = ce->engine->i915;
993563baae1SChris Wilson 	struct sgt_dma it = sg_sgt(sg);
994563baae1SChris Wilson 	struct i915_request *rq;
9956e6bc8c0SRamalingam C 	u32 offset;
996563baae1SChris Wilson 	int err;
997563baae1SChris Wilson 
99894ce0d65SChris Wilson 	GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
999563baae1SChris Wilson 	*out = NULL;
1000563baae1SChris Wilson 
1001563baae1SChris Wilson 	GEM_BUG_ON(ce->ring->size < SZ_64K);
1002563baae1SChris Wilson 
10036e6bc8c0SRamalingam C 	offset = 0;
100448760ffeSRamalingam C 	if (HAS_64K_PAGES(i915) && is_lmem)
10056e6bc8c0SRamalingam C 		offset = CHUNK_SZ;
10066e6bc8c0SRamalingam C 
1007563baae1SChris Wilson 	do {
1008563baae1SChris Wilson 		int len;
1009563baae1SChris Wilson 
1010563baae1SChris Wilson 		rq = i915_request_create(ce);
1011563baae1SChris Wilson 		if (IS_ERR(rq)) {
1012563baae1SChris Wilson 			err = PTR_ERR(rq);
1013563baae1SChris Wilson 			goto out_ce;
1014563baae1SChris Wilson 		}
1015563baae1SChris Wilson 
101611930817SThomas Hellström 		if (deps) {
101711930817SThomas Hellström 			err = i915_request_await_deps(rq, deps);
1018563baae1SChris Wilson 			if (err)
1019563baae1SChris Wilson 				goto out_rq;
1020563baae1SChris Wilson 
1021563baae1SChris Wilson 			if (rq->engine->emit_init_breadcrumb) {
1022563baae1SChris Wilson 				err = rq->engine->emit_init_breadcrumb(rq);
1023563baae1SChris Wilson 				if (err)
1024563baae1SChris Wilson 					goto out_rq;
1025563baae1SChris Wilson 			}
1026563baae1SChris Wilson 
102711930817SThomas Hellström 			deps = NULL;
1028563baae1SChris Wilson 		}
1029563baae1SChris Wilson 
1030563baae1SChris Wilson 		/* The PTE updates + clear must not be interrupted. */
1031563baae1SChris Wilson 		err = emit_no_arbitration(rq);
1032563baae1SChris Wilson 		if (err)
1033563baae1SChris Wilson 			goto out_rq;
1034563baae1SChris Wilson 
10359275277dSFei Yang 		len = emit_pte(rq, &it, pat_index, is_lmem, offset, CHUNK_SZ);
1036563baae1SChris Wilson 		if (len <= 0) {
1037563baae1SChris Wilson 			err = len;
1038563baae1SChris Wilson 			goto out_rq;
1039563baae1SChris Wilson 		}
1040563baae1SChris Wilson 
1041563baae1SChris Wilson 		err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
1042563baae1SChris Wilson 		if (err)
1043563baae1SChris Wilson 			goto out_rq;
1044563baae1SChris Wilson 
1045a0ed9c95SRamalingam C 		err = emit_clear(rq, offset, len, value, is_lmem);
104648760ffeSRamalingam C 		if (err)
104748760ffeSRamalingam C 			goto out_rq;
104848760ffeSRamalingam C 
104948760ffeSRamalingam C 		if (HAS_FLAT_CCS(i915) && is_lmem && !value) {
105048760ffeSRamalingam C 			/*
105148760ffeSRamalingam C 			 * copy the content of memory into corresponding
105248760ffeSRamalingam C 			 * ccs surface
105348760ffeSRamalingam C 			 */
105448760ffeSRamalingam C 			err = emit_copy_ccs(rq, offset, INDIRECT_ACCESS, offset,
105548760ffeSRamalingam C 					    DIRECT_ACCESS, len);
105648760ffeSRamalingam C 			if (err)
105748760ffeSRamalingam C 				goto out_rq;
105848760ffeSRamalingam C 		}
105948760ffeSRamalingam C 
106048760ffeSRamalingam C 		err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
1061563baae1SChris Wilson 
1062563baae1SChris Wilson 		/* Arbitration is re-enabled between requests. */
1063563baae1SChris Wilson out_rq:
1064563baae1SChris Wilson 		if (*out)
1065563baae1SChris Wilson 			i915_request_put(*out);
1066563baae1SChris Wilson 		*out = i915_request_get(rq);
1067563baae1SChris Wilson 		i915_request_add(rq);
1068563baae1SChris Wilson 		if (err || !it.sg || !sg_dma_len(it.sg))
1069563baae1SChris Wilson 			break;
1070563baae1SChris Wilson 
1071563baae1SChris Wilson 		cond_resched();
1072563baae1SChris Wilson 	} while (1);
1073563baae1SChris Wilson 
1074563baae1SChris Wilson out_ce:
1075563baae1SChris Wilson 	return err;
1076563baae1SChris Wilson }
1077563baae1SChris Wilson 
intel_migrate_copy(struct intel_migrate * m,struct i915_gem_ww_ctx * ww,const struct i915_deps * deps,struct scatterlist * src,unsigned int src_pat_index,bool src_is_lmem,struct scatterlist * dst,unsigned int dst_pat_index,bool dst_is_lmem,struct i915_request ** out)1078cf586021SChris Wilson int intel_migrate_copy(struct intel_migrate *m,
1079cf586021SChris Wilson 		       struct i915_gem_ww_ctx *ww,
108011930817SThomas Hellström 		       const struct i915_deps *deps,
1081cf586021SChris Wilson 		       struct scatterlist *src,
10829275277dSFei Yang 		       unsigned int src_pat_index,
1083cf586021SChris Wilson 		       bool src_is_lmem,
1084cf586021SChris Wilson 		       struct scatterlist *dst,
10859275277dSFei Yang 		       unsigned int dst_pat_index,
1086cf586021SChris Wilson 		       bool dst_is_lmem,
1087cf586021SChris Wilson 		       struct i915_request **out)
1088cf586021SChris Wilson {
1089cf586021SChris Wilson 	struct intel_context *ce;
1090cf586021SChris Wilson 	int err;
1091cf586021SChris Wilson 
1092cf586021SChris Wilson 	*out = NULL;
1093cf586021SChris Wilson 	if (!m->context)
1094cf586021SChris Wilson 		return -ENODEV;
1095cf586021SChris Wilson 
1096cf586021SChris Wilson 	ce = intel_migrate_create_context(m);
1097cf586021SChris Wilson 	if (IS_ERR(ce))
1098cf586021SChris Wilson 		ce = intel_context_get(m->context);
1099cf586021SChris Wilson 	GEM_BUG_ON(IS_ERR(ce));
1100cf586021SChris Wilson 
1101cf586021SChris Wilson 	err = intel_context_pin_ww(ce, ww);
1102cf586021SChris Wilson 	if (err)
1103cf586021SChris Wilson 		goto out;
1104cf586021SChris Wilson 
110511930817SThomas Hellström 	err = intel_context_migrate_copy(ce, deps,
11069275277dSFei Yang 					 src, src_pat_index, src_is_lmem,
11079275277dSFei Yang 					 dst, dst_pat_index, dst_is_lmem,
1108cf586021SChris Wilson 					 out);
1109cf586021SChris Wilson 
1110cf586021SChris Wilson 	intel_context_unpin(ce);
1111cf586021SChris Wilson out:
1112cf586021SChris Wilson 	intel_context_put(ce);
1113cf586021SChris Wilson 	return err;
1114cf586021SChris Wilson }
1115cf586021SChris Wilson 
1116563baae1SChris Wilson int
intel_migrate_clear(struct intel_migrate * m,struct i915_gem_ww_ctx * ww,const struct i915_deps * deps,struct scatterlist * sg,unsigned int pat_index,bool is_lmem,u32 value,struct i915_request ** out)1117563baae1SChris Wilson intel_migrate_clear(struct intel_migrate *m,
1118563baae1SChris Wilson 		    struct i915_gem_ww_ctx *ww,
111911930817SThomas Hellström 		    const struct i915_deps *deps,
1120563baae1SChris Wilson 		    struct scatterlist *sg,
11219275277dSFei Yang 		    unsigned int pat_index,
1122563baae1SChris Wilson 		    bool is_lmem,
1123563baae1SChris Wilson 		    u32 value,
1124563baae1SChris Wilson 		    struct i915_request **out)
1125563baae1SChris Wilson {
1126563baae1SChris Wilson 	struct intel_context *ce;
1127563baae1SChris Wilson 	int err;
1128563baae1SChris Wilson 
1129563baae1SChris Wilson 	*out = NULL;
1130563baae1SChris Wilson 	if (!m->context)
1131563baae1SChris Wilson 		return -ENODEV;
1132563baae1SChris Wilson 
1133563baae1SChris Wilson 	ce = intel_migrate_create_context(m);
1134563baae1SChris Wilson 	if (IS_ERR(ce))
1135563baae1SChris Wilson 		ce = intel_context_get(m->context);
1136563baae1SChris Wilson 	GEM_BUG_ON(IS_ERR(ce));
1137563baae1SChris Wilson 
1138563baae1SChris Wilson 	err = intel_context_pin_ww(ce, ww);
1139563baae1SChris Wilson 	if (err)
1140563baae1SChris Wilson 		goto out;
1141563baae1SChris Wilson 
11429275277dSFei Yang 	err = intel_context_migrate_clear(ce, deps, sg, pat_index,
1143563baae1SChris Wilson 					  is_lmem, value, out);
1144563baae1SChris Wilson 
1145563baae1SChris Wilson 	intel_context_unpin(ce);
1146563baae1SChris Wilson out:
1147563baae1SChris Wilson 	intel_context_put(ce);
1148563baae1SChris Wilson 	return err;
1149563baae1SChris Wilson }
1150563baae1SChris Wilson 
intel_migrate_fini(struct intel_migrate * m)1151cf586021SChris Wilson void intel_migrate_fini(struct intel_migrate *m)
1152cf586021SChris Wilson {
1153cf586021SChris Wilson 	struct intel_context *ce;
1154cf586021SChris Wilson 
1155cf586021SChris Wilson 	ce = fetch_and_zero(&m->context);
1156cf586021SChris Wilson 	if (!ce)
1157cf586021SChris Wilson 		return;
1158cf586021SChris Wilson 
1159cf586021SChris Wilson 	intel_engine_destroy_pinned_context(ce);
1160cf586021SChris Wilson }
1161cf586021SChris Wilson 
1162cf586021SChris Wilson #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1163cf586021SChris Wilson #include "selftest_migrate.c"
1164cf586021SChris Wilson #endif
1165