xref: /openbmc/linux/drivers/gpu/drm/i915/gt/intel_gt.c (revision 8e4ee5e8)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5 
6 #include <drm/drm_managed.h>
7 #include <drm/intel-gtt.h>
8 
9 #include "gem/i915_gem_internal.h"
10 #include "gem/i915_gem_lmem.h"
11 #include "pxp/intel_pxp.h"
12 
13 #include "i915_drv.h"
14 #include "i915_perf_oa_regs.h"
15 #include "intel_context.h"
16 #include "intel_engine_pm.h"
17 #include "intel_engine_regs.h"
18 #include "intel_ggtt_gmch.h"
19 #include "intel_gt.h"
20 #include "intel_gt_buffer_pool.h"
21 #include "intel_gt_clock_utils.h"
22 #include "intel_gt_debugfs.h"
23 #include "intel_gt_mcr.h"
24 #include "intel_gt_pm.h"
25 #include "intel_gt_regs.h"
26 #include "intel_gt_requests.h"
27 #include "intel_migrate.h"
28 #include "intel_mocs.h"
29 #include "intel_pci_config.h"
30 #include "intel_pm.h"
31 #include "intel_rc6.h"
32 #include "intel_renderstate.h"
33 #include "intel_rps.h"
34 #include "intel_sa_media.h"
35 #include "intel_gt_sysfs.h"
36 #include "intel_uncore.h"
37 #include "shmem_utils.h"
38 
39 void intel_gt_common_init_early(struct intel_gt *gt)
40 {
41 	spin_lock_init(gt->irq_lock);
42 
43 	INIT_LIST_HEAD(&gt->closed_vma);
44 	spin_lock_init(&gt->closed_lock);
45 
46 	init_llist_head(&gt->watchdog.list);
47 	INIT_WORK(&gt->watchdog.work, intel_gt_watchdog_work);
48 
49 	intel_gt_init_buffer_pool(gt);
50 	intel_gt_init_reset(gt);
51 	intel_gt_init_requests(gt);
52 	intel_gt_init_timelines(gt);
53 	mutex_init(&gt->tlb.invalidate_lock);
54 	seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
55 	intel_gt_pm_init_early(gt);
56 
57 	intel_wopcm_init_early(&gt->wopcm);
58 	intel_uc_init_early(&gt->uc);
59 	intel_rps_init_early(&gt->rps);
60 }
61 
62 /* Preliminary initialization of Tile 0 */
63 int intel_root_gt_init_early(struct drm_i915_private *i915)
64 {
65 	struct intel_gt *gt = to_gt(i915);
66 
67 	gt->i915 = i915;
68 	gt->uncore = &i915->uncore;
69 	gt->irq_lock = drmm_kzalloc(&i915->drm, sizeof(*gt->irq_lock), GFP_KERNEL);
70 	if (!gt->irq_lock)
71 		return -ENOMEM;
72 
73 	intel_gt_common_init_early(gt);
74 
75 	return 0;
76 }
77 
78 static int intel_gt_probe_lmem(struct intel_gt *gt)
79 {
80 	struct drm_i915_private *i915 = gt->i915;
81 	unsigned int instance = gt->info.id;
82 	int id = INTEL_REGION_LMEM_0 + instance;
83 	struct intel_memory_region *mem;
84 	int err;
85 
86 	mem = intel_gt_setup_lmem(gt);
87 	if (IS_ERR(mem)) {
88 		err = PTR_ERR(mem);
89 		if (err == -ENODEV)
90 			return 0;
91 
92 		drm_err(&i915->drm,
93 			"Failed to setup region(%d) type=%d\n",
94 			err, INTEL_MEMORY_LOCAL);
95 		return err;
96 	}
97 
98 	mem->id = id;
99 	mem->instance = instance;
100 
101 	intel_memory_region_set_name(mem, "local%u", mem->instance);
102 
103 	GEM_BUG_ON(!HAS_REGION(i915, id));
104 	GEM_BUG_ON(i915->mm.regions[id]);
105 	i915->mm.regions[id] = mem;
106 
107 	return 0;
108 }
109 
110 int intel_gt_assign_ggtt(struct intel_gt *gt)
111 {
112 	/* Media GT shares primary GT's GGTT */
113 	if (gt->type == GT_MEDIA) {
114 		gt->ggtt = to_gt(gt->i915)->ggtt;
115 	} else {
116 		gt->ggtt = i915_ggtt_create(gt->i915);
117 		if (IS_ERR(gt->ggtt))
118 			return PTR_ERR(gt->ggtt);
119 	}
120 
121 	list_add_tail(&gt->ggtt_link, &gt->ggtt->gt_list);
122 
123 	return 0;
124 }
125 
126 int intel_gt_init_mmio(struct intel_gt *gt)
127 {
128 	intel_gt_init_clock_frequency(gt);
129 
130 	intel_uc_init_mmio(&gt->uc);
131 	intel_sseu_info_init(gt);
132 	intel_gt_mcr_init(gt);
133 
134 	return intel_engines_init_mmio(gt);
135 }
136 
137 static void init_unused_ring(struct intel_gt *gt, u32 base)
138 {
139 	struct intel_uncore *uncore = gt->uncore;
140 
141 	intel_uncore_write(uncore, RING_CTL(base), 0);
142 	intel_uncore_write(uncore, RING_HEAD(base), 0);
143 	intel_uncore_write(uncore, RING_TAIL(base), 0);
144 	intel_uncore_write(uncore, RING_START(base), 0);
145 }
146 
147 static void init_unused_rings(struct intel_gt *gt)
148 {
149 	struct drm_i915_private *i915 = gt->i915;
150 
151 	if (IS_I830(i915)) {
152 		init_unused_ring(gt, PRB1_BASE);
153 		init_unused_ring(gt, SRB0_BASE);
154 		init_unused_ring(gt, SRB1_BASE);
155 		init_unused_ring(gt, SRB2_BASE);
156 		init_unused_ring(gt, SRB3_BASE);
157 	} else if (GRAPHICS_VER(i915) == 2) {
158 		init_unused_ring(gt, SRB0_BASE);
159 		init_unused_ring(gt, SRB1_BASE);
160 	} else if (GRAPHICS_VER(i915) == 3) {
161 		init_unused_ring(gt, PRB1_BASE);
162 		init_unused_ring(gt, PRB2_BASE);
163 	}
164 }
165 
166 int intel_gt_init_hw(struct intel_gt *gt)
167 {
168 	struct drm_i915_private *i915 = gt->i915;
169 	struct intel_uncore *uncore = gt->uncore;
170 	int ret;
171 
172 	gt->last_init_time = ktime_get();
173 
174 	/* Double layer security blanket, see i915_gem_init() */
175 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
176 
177 	if (HAS_EDRAM(i915) && GRAPHICS_VER(i915) < 9)
178 		intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf));
179 
180 	if (IS_HASWELL(i915))
181 		intel_uncore_write(uncore,
182 				   HSW_MI_PREDICATE_RESULT_2,
183 				   IS_HSW_GT3(i915) ?
184 				   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
185 
186 	/* Apply the GT workarounds... */
187 	intel_gt_apply_workarounds(gt);
188 	/* ...and determine whether they are sticking. */
189 	intel_gt_verify_workarounds(gt, "init");
190 
191 	intel_gt_init_swizzling(gt);
192 
193 	/*
194 	 * At least 830 can leave some of the unused rings
195 	 * "active" (ie. head != tail) after resume which
196 	 * will prevent c3 entry. Makes sure all unused rings
197 	 * are totally idle.
198 	 */
199 	init_unused_rings(gt);
200 
201 	ret = i915_ppgtt_init_hw(gt);
202 	if (ret) {
203 		drm_err(&i915->drm, "Enabling PPGTT failed (%d)\n", ret);
204 		goto out;
205 	}
206 
207 	/* We can't enable contexts until all firmware is loaded */
208 	ret = intel_uc_init_hw(&gt->uc);
209 	if (ret) {
210 		i915_probe_error(i915, "Enabling uc failed (%d)\n", ret);
211 		goto out;
212 	}
213 
214 	intel_mocs_init(gt);
215 
216 out:
217 	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
218 	return ret;
219 }
220 
221 static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set)
222 {
223 	intel_uncore_rmw(uncore, reg, 0, set);
224 }
225 
226 static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
227 {
228 	intel_uncore_rmw(uncore, reg, clr, 0);
229 }
230 
231 static void clear_register(struct intel_uncore *uncore, i915_reg_t reg)
232 {
233 	intel_uncore_rmw(uncore, reg, 0, 0);
234 }
235 
236 static void gen6_clear_engine_error_register(struct intel_engine_cs *engine)
237 {
238 	GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0);
239 	GEN6_RING_FAULT_REG_POSTING_READ(engine);
240 }
241 
242 i915_reg_t intel_gt_perf_limit_reasons_reg(struct intel_gt *gt)
243 {
244 	/* GT0_PERF_LIMIT_REASONS is available only for Gen11+ */
245 	if (GRAPHICS_VER(gt->i915) < 11)
246 		return INVALID_MMIO_REG;
247 
248 	return gt->type == GT_MEDIA ?
249 		MTL_MEDIA_PERF_LIMIT_REASONS : GT0_PERF_LIMIT_REASONS;
250 }
251 
252 void
253 intel_gt_clear_error_registers(struct intel_gt *gt,
254 			       intel_engine_mask_t engine_mask)
255 {
256 	struct drm_i915_private *i915 = gt->i915;
257 	struct intel_uncore *uncore = gt->uncore;
258 	u32 eir;
259 
260 	if (GRAPHICS_VER(i915) != 2)
261 		clear_register(uncore, PGTBL_ER);
262 
263 	if (GRAPHICS_VER(i915) < 4)
264 		clear_register(uncore, IPEIR(RENDER_RING_BASE));
265 	else
266 		clear_register(uncore, IPEIR_I965);
267 
268 	clear_register(uncore, EIR);
269 	eir = intel_uncore_read(uncore, EIR);
270 	if (eir) {
271 		/*
272 		 * some errors might have become stuck,
273 		 * mask them.
274 		 */
275 		drm_dbg(&gt->i915->drm, "EIR stuck: 0x%08x, masking\n", eir);
276 		rmw_set(uncore, EMR, eir);
277 		intel_uncore_write(uncore, GEN2_IIR,
278 				   I915_MASTER_ERROR_INTERRUPT);
279 	}
280 
281 	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
282 		intel_gt_mcr_multicast_rmw(gt, XEHP_RING_FAULT_REG,
283 					   RING_FAULT_VALID, 0);
284 		intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
285 	} else if (GRAPHICS_VER(i915) >= 12) {
286 		rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID);
287 		intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG);
288 	} else if (GRAPHICS_VER(i915) >= 8) {
289 		rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID);
290 		intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG);
291 	} else if (GRAPHICS_VER(i915) >= 6) {
292 		struct intel_engine_cs *engine;
293 		enum intel_engine_id id;
294 
295 		for_each_engine_masked(engine, gt, engine_mask, id)
296 			gen6_clear_engine_error_register(engine);
297 	}
298 }
299 
300 static void gen6_check_faults(struct intel_gt *gt)
301 {
302 	struct intel_engine_cs *engine;
303 	enum intel_engine_id id;
304 	u32 fault;
305 
306 	for_each_engine(engine, gt, id) {
307 		fault = GEN6_RING_FAULT_REG_READ(engine);
308 		if (fault & RING_FAULT_VALID) {
309 			drm_dbg(&engine->i915->drm, "Unexpected fault\n"
310 				"\tAddr: 0x%08lx\n"
311 				"\tAddress space: %s\n"
312 				"\tSource ID: %d\n"
313 				"\tType: %d\n",
314 				fault & PAGE_MASK,
315 				fault & RING_FAULT_GTTSEL_MASK ?
316 				"GGTT" : "PPGTT",
317 				RING_FAULT_SRCID(fault),
318 				RING_FAULT_FAULT_TYPE(fault));
319 		}
320 	}
321 }
322 
323 static void xehp_check_faults(struct intel_gt *gt)
324 {
325 	u32 fault;
326 
327 	/*
328 	 * Although the fault register now lives in an MCR register range,
329 	 * the GAM registers are special and we only truly need to read
330 	 * the "primary" GAM instance rather than handling each instance
331 	 * individually.  intel_gt_mcr_read_any() will automatically steer
332 	 * toward the primary instance.
333 	 */
334 	fault = intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
335 	if (fault & RING_FAULT_VALID) {
336 		u32 fault_data0, fault_data1;
337 		u64 fault_addr;
338 
339 		fault_data0 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA0);
340 		fault_data1 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA1);
341 
342 		fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
343 			     ((u64)fault_data0 << 12);
344 
345 		drm_dbg(&gt->i915->drm, "Unexpected fault\n"
346 			"\tAddr: 0x%08x_%08x\n"
347 			"\tAddress space: %s\n"
348 			"\tEngine ID: %d\n"
349 			"\tSource ID: %d\n"
350 			"\tType: %d\n",
351 			upper_32_bits(fault_addr), lower_32_bits(fault_addr),
352 			fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
353 			GEN8_RING_FAULT_ENGINE_ID(fault),
354 			RING_FAULT_SRCID(fault),
355 			RING_FAULT_FAULT_TYPE(fault));
356 	}
357 }
358 
359 static void gen8_check_faults(struct intel_gt *gt)
360 {
361 	struct intel_uncore *uncore = gt->uncore;
362 	i915_reg_t fault_reg, fault_data0_reg, fault_data1_reg;
363 	u32 fault;
364 
365 	if (GRAPHICS_VER(gt->i915) >= 12) {
366 		fault_reg = GEN12_RING_FAULT_REG;
367 		fault_data0_reg = GEN12_FAULT_TLB_DATA0;
368 		fault_data1_reg = GEN12_FAULT_TLB_DATA1;
369 	} else {
370 		fault_reg = GEN8_RING_FAULT_REG;
371 		fault_data0_reg = GEN8_FAULT_TLB_DATA0;
372 		fault_data1_reg = GEN8_FAULT_TLB_DATA1;
373 	}
374 
375 	fault = intel_uncore_read(uncore, fault_reg);
376 	if (fault & RING_FAULT_VALID) {
377 		u32 fault_data0, fault_data1;
378 		u64 fault_addr;
379 
380 		fault_data0 = intel_uncore_read(uncore, fault_data0_reg);
381 		fault_data1 = intel_uncore_read(uncore, fault_data1_reg);
382 
383 		fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
384 			     ((u64)fault_data0 << 12);
385 
386 		drm_dbg(&uncore->i915->drm, "Unexpected fault\n"
387 			"\tAddr: 0x%08x_%08x\n"
388 			"\tAddress space: %s\n"
389 			"\tEngine ID: %d\n"
390 			"\tSource ID: %d\n"
391 			"\tType: %d\n",
392 			upper_32_bits(fault_addr), lower_32_bits(fault_addr),
393 			fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
394 			GEN8_RING_FAULT_ENGINE_ID(fault),
395 			RING_FAULT_SRCID(fault),
396 			RING_FAULT_FAULT_TYPE(fault));
397 	}
398 }
399 
400 void intel_gt_check_and_clear_faults(struct intel_gt *gt)
401 {
402 	struct drm_i915_private *i915 = gt->i915;
403 
404 	/* From GEN8 onwards we only have one 'All Engine Fault Register' */
405 	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
406 		xehp_check_faults(gt);
407 	else if (GRAPHICS_VER(i915) >= 8)
408 		gen8_check_faults(gt);
409 	else if (GRAPHICS_VER(i915) >= 6)
410 		gen6_check_faults(gt);
411 	else
412 		return;
413 
414 	intel_gt_clear_error_registers(gt, ALL_ENGINES);
415 }
416 
417 void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
418 {
419 	struct intel_uncore *uncore = gt->uncore;
420 	intel_wakeref_t wakeref;
421 
422 	/*
423 	 * No actual flushing is required for the GTT write domain for reads
424 	 * from the GTT domain. Writes to it "immediately" go to main memory
425 	 * as far as we know, so there's no chipset flush. It also doesn't
426 	 * land in the GPU render cache.
427 	 *
428 	 * However, we do have to enforce the order so that all writes through
429 	 * the GTT land before any writes to the device, such as updates to
430 	 * the GATT itself.
431 	 *
432 	 * We also have to wait a bit for the writes to land from the GTT.
433 	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
434 	 * timing. This issue has only been observed when switching quickly
435 	 * between GTT writes and CPU reads from inside the kernel on recent hw,
436 	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
437 	 * system agents we cannot reproduce this behaviour, until Cannonlake
438 	 * that was!).
439 	 */
440 
441 	wmb();
442 
443 	if (INTEL_INFO(gt->i915)->has_coherent_ggtt)
444 		return;
445 
446 	intel_gt_chipset_flush(gt);
447 
448 	with_intel_runtime_pm_if_in_use(uncore->rpm, wakeref) {
449 		unsigned long flags;
450 
451 		spin_lock_irqsave(&uncore->lock, flags);
452 		intel_uncore_posting_read_fw(uncore,
453 					     RING_HEAD(RENDER_RING_BASE));
454 		spin_unlock_irqrestore(&uncore->lock, flags);
455 	}
456 }
457 
458 void intel_gt_chipset_flush(struct intel_gt *gt)
459 {
460 	wmb();
461 	if (GRAPHICS_VER(gt->i915) < 6)
462 		intel_ggtt_gmch_flush();
463 }
464 
465 void intel_gt_driver_register(struct intel_gt *gt)
466 {
467 	intel_gsc_init(&gt->gsc, gt->i915);
468 
469 	intel_rps_driver_register(&gt->rps);
470 
471 	intel_gt_debugfs_register(gt);
472 	intel_gt_sysfs_register(gt);
473 }
474 
475 static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
476 {
477 	struct drm_i915_private *i915 = gt->i915;
478 	struct drm_i915_gem_object *obj;
479 	struct i915_vma *vma;
480 	int ret;
481 
482 	obj = i915_gem_object_create_lmem(i915, size,
483 					  I915_BO_ALLOC_VOLATILE |
484 					  I915_BO_ALLOC_GPU_ONLY);
485 	if (IS_ERR(obj))
486 		obj = i915_gem_object_create_stolen(i915, size);
487 	if (IS_ERR(obj))
488 		obj = i915_gem_object_create_internal(i915, size);
489 	if (IS_ERR(obj)) {
490 		drm_err(&i915->drm, "Failed to allocate scratch page\n");
491 		return PTR_ERR(obj);
492 	}
493 
494 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
495 	if (IS_ERR(vma)) {
496 		ret = PTR_ERR(vma);
497 		goto err_unref;
498 	}
499 
500 	ret = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
501 	if (ret)
502 		goto err_unref;
503 
504 	gt->scratch = i915_vma_make_unshrinkable(vma);
505 
506 	return 0;
507 
508 err_unref:
509 	i915_gem_object_put(obj);
510 	return ret;
511 }
512 
513 static void intel_gt_fini_scratch(struct intel_gt *gt)
514 {
515 	i915_vma_unpin_and_release(&gt->scratch, 0);
516 }
517 
518 static struct i915_address_space *kernel_vm(struct intel_gt *gt)
519 {
520 	if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING)
521 		return &i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY)->vm;
522 	else
523 		return i915_vm_get(&gt->ggtt->vm);
524 }
525 
526 static int __engines_record_defaults(struct intel_gt *gt)
527 {
528 	struct i915_request *requests[I915_NUM_ENGINES] = {};
529 	struct intel_engine_cs *engine;
530 	enum intel_engine_id id;
531 	int err = 0;
532 
533 	/*
534 	 * As we reset the gpu during very early sanitisation, the current
535 	 * register state on the GPU should reflect its defaults values.
536 	 * We load a context onto the hw (with restore-inhibit), then switch
537 	 * over to a second context to save that default register state. We
538 	 * can then prime every new context with that state so they all start
539 	 * from the same default HW values.
540 	 */
541 
542 	for_each_engine(engine, gt, id) {
543 		struct intel_renderstate so;
544 		struct intel_context *ce;
545 		struct i915_request *rq;
546 
547 		/* We must be able to switch to something! */
548 		GEM_BUG_ON(!engine->kernel_context);
549 
550 		ce = intel_context_create(engine);
551 		if (IS_ERR(ce)) {
552 			err = PTR_ERR(ce);
553 			goto out;
554 		}
555 
556 		err = intel_renderstate_init(&so, ce);
557 		if (err)
558 			goto err;
559 
560 		rq = i915_request_create(ce);
561 		if (IS_ERR(rq)) {
562 			err = PTR_ERR(rq);
563 			goto err_fini;
564 		}
565 
566 		err = intel_engine_emit_ctx_wa(rq);
567 		if (err)
568 			goto err_rq;
569 
570 		err = intel_renderstate_emit(&so, rq);
571 		if (err)
572 			goto err_rq;
573 
574 err_rq:
575 		requests[id] = i915_request_get(rq);
576 		i915_request_add(rq);
577 err_fini:
578 		intel_renderstate_fini(&so, ce);
579 err:
580 		if (err) {
581 			intel_context_put(ce);
582 			goto out;
583 		}
584 	}
585 
586 	/* Flush the default context image to memory, and enable powersaving. */
587 	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
588 		err = -EIO;
589 		goto out;
590 	}
591 
592 	for (id = 0; id < ARRAY_SIZE(requests); id++) {
593 		struct i915_request *rq;
594 		struct file *state;
595 
596 		rq = requests[id];
597 		if (!rq)
598 			continue;
599 
600 		if (rq->fence.error) {
601 			err = -EIO;
602 			goto out;
603 		}
604 
605 		GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags));
606 		if (!rq->context->state)
607 			continue;
608 
609 		/* Keep a copy of the state's backing pages; free the obj */
610 		state = shmem_create_from_object(rq->context->state->obj);
611 		if (IS_ERR(state)) {
612 			err = PTR_ERR(state);
613 			goto out;
614 		}
615 		rq->engine->default_state = state;
616 	}
617 
618 out:
619 	/*
620 	 * If we have to abandon now, we expect the engines to be idle
621 	 * and ready to be torn-down. The quickest way we can accomplish
622 	 * this is by declaring ourselves wedged.
623 	 */
624 	if (err)
625 		intel_gt_set_wedged(gt);
626 
627 	for (id = 0; id < ARRAY_SIZE(requests); id++) {
628 		struct intel_context *ce;
629 		struct i915_request *rq;
630 
631 		rq = requests[id];
632 		if (!rq)
633 			continue;
634 
635 		ce = rq->context;
636 		i915_request_put(rq);
637 		intel_context_put(ce);
638 	}
639 	return err;
640 }
641 
642 static int __engines_verify_workarounds(struct intel_gt *gt)
643 {
644 	struct intel_engine_cs *engine;
645 	enum intel_engine_id id;
646 	int err = 0;
647 
648 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
649 		return 0;
650 
651 	for_each_engine(engine, gt, id) {
652 		if (intel_engine_verify_workarounds(engine, "load"))
653 			err = -EIO;
654 	}
655 
656 	/* Flush and restore the kernel context for safety */
657 	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME)
658 		err = -EIO;
659 
660 	return err;
661 }
662 
663 static void __intel_gt_disable(struct intel_gt *gt)
664 {
665 	intel_gt_set_wedged_on_fini(gt);
666 
667 	intel_gt_suspend_prepare(gt);
668 	intel_gt_suspend_late(gt);
669 
670 	GEM_BUG_ON(intel_gt_pm_is_awake(gt));
671 }
672 
673 int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
674 {
675 	long remaining_timeout;
676 
677 	/* If the device is asleep, we have no requests outstanding */
678 	if (!intel_gt_pm_is_awake(gt))
679 		return 0;
680 
681 	while ((timeout = intel_gt_retire_requests_timeout(gt, timeout,
682 							   &remaining_timeout)) > 0) {
683 		cond_resched();
684 		if (signal_pending(current))
685 			return -EINTR;
686 	}
687 
688 	if (timeout)
689 		return timeout;
690 
691 	if (remaining_timeout < 0)
692 		remaining_timeout = 0;
693 
694 	return intel_uc_wait_for_idle(&gt->uc, remaining_timeout);
695 }
696 
697 int intel_gt_init(struct intel_gt *gt)
698 {
699 	int err;
700 
701 	err = i915_inject_probe_error(gt->i915, -ENODEV);
702 	if (err)
703 		return err;
704 
705 	intel_gt_init_workarounds(gt);
706 
707 	/*
708 	 * This is just a security blanket to placate dragons.
709 	 * On some systems, we very sporadically observe that the first TLBs
710 	 * used by the CS may be stale, despite us poking the TLB reset. If
711 	 * we hold the forcewake during initialisation these problems
712 	 * just magically go away.
713 	 */
714 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
715 
716 	err = intel_gt_init_scratch(gt,
717 				    GRAPHICS_VER(gt->i915) == 2 ? SZ_256K : SZ_4K);
718 	if (err)
719 		goto out_fw;
720 
721 	intel_gt_pm_init(gt);
722 
723 	gt->vm = kernel_vm(gt);
724 	if (!gt->vm) {
725 		err = -ENOMEM;
726 		goto err_pm;
727 	}
728 
729 	intel_set_mocs_index(gt);
730 
731 	err = intel_engines_init(gt);
732 	if (err)
733 		goto err_engines;
734 
735 	err = intel_uc_init(&gt->uc);
736 	if (err)
737 		goto err_engines;
738 
739 	err = intel_gt_resume(gt);
740 	if (err)
741 		goto err_uc_init;
742 
743 	err = intel_gt_init_hwconfig(gt);
744 	if (err)
745 		drm_err(&gt->i915->drm, "Failed to retrieve hwconfig table: %pe\n",
746 			ERR_PTR(err));
747 
748 	err = __engines_record_defaults(gt);
749 	if (err)
750 		goto err_gt;
751 
752 	err = __engines_verify_workarounds(gt);
753 	if (err)
754 		goto err_gt;
755 
756 	intel_uc_init_late(&gt->uc);
757 
758 	err = i915_inject_probe_error(gt->i915, -EIO);
759 	if (err)
760 		goto err_gt;
761 
762 	intel_migrate_init(&gt->migrate, gt);
763 
764 	intel_pxp_init(&gt->pxp);
765 
766 	goto out_fw;
767 err_gt:
768 	__intel_gt_disable(gt);
769 	intel_uc_fini_hw(&gt->uc);
770 err_uc_init:
771 	intel_uc_fini(&gt->uc);
772 err_engines:
773 	intel_engines_release(gt);
774 	i915_vm_put(fetch_and_zero(&gt->vm));
775 err_pm:
776 	intel_gt_pm_fini(gt);
777 	intel_gt_fini_scratch(gt);
778 out_fw:
779 	if (err)
780 		intel_gt_set_wedged_on_init(gt);
781 	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
782 	return err;
783 }
784 
785 void intel_gt_driver_remove(struct intel_gt *gt)
786 {
787 	__intel_gt_disable(gt);
788 
789 	intel_migrate_fini(&gt->migrate);
790 	intel_uc_driver_remove(&gt->uc);
791 
792 	intel_engines_release(gt);
793 
794 	intel_gt_flush_buffer_pool(gt);
795 }
796 
797 void intel_gt_driver_unregister(struct intel_gt *gt)
798 {
799 	intel_wakeref_t wakeref;
800 
801 	intel_gt_sysfs_unregister(gt);
802 	intel_rps_driver_unregister(&gt->rps);
803 	intel_gsc_fini(&gt->gsc);
804 
805 	intel_pxp_fini(&gt->pxp);
806 
807 	/*
808 	 * Upon unregistering the device to prevent any new users, cancel
809 	 * all in-flight requests so that we can quickly unbind the active
810 	 * resources.
811 	 */
812 	intel_gt_set_wedged_on_fini(gt);
813 
814 	/* Scrub all HW state upon release */
815 	with_intel_runtime_pm(gt->uncore->rpm, wakeref)
816 		__intel_gt_reset(gt, ALL_ENGINES);
817 }
818 
819 void intel_gt_driver_release(struct intel_gt *gt)
820 {
821 	struct i915_address_space *vm;
822 
823 	vm = fetch_and_zero(&gt->vm);
824 	if (vm) /* FIXME being called twice on error paths :( */
825 		i915_vm_put(vm);
826 
827 	intel_wa_list_free(&gt->wa_list);
828 	intel_gt_pm_fini(gt);
829 	intel_gt_fini_scratch(gt);
830 	intel_gt_fini_buffer_pool(gt);
831 	intel_gt_fini_hwconfig(gt);
832 }
833 
834 void intel_gt_driver_late_release_all(struct drm_i915_private *i915)
835 {
836 	struct intel_gt *gt;
837 	unsigned int id;
838 
839 	/* We need to wait for inflight RCU frees to release their grip */
840 	rcu_barrier();
841 
842 	for_each_gt(gt, i915, id) {
843 		intel_uc_driver_late_release(&gt->uc);
844 		intel_gt_fini_requests(gt);
845 		intel_gt_fini_reset(gt);
846 		intel_gt_fini_timelines(gt);
847 		mutex_destroy(&gt->tlb.invalidate_lock);
848 		intel_engines_free(gt);
849 	}
850 }
851 
852 static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr)
853 {
854 	int ret;
855 
856 	if (!gt_is_root(gt)) {
857 		struct intel_uncore *uncore;
858 		spinlock_t *irq_lock;
859 
860 		uncore = drmm_kzalloc(&gt->i915->drm, sizeof(*uncore), GFP_KERNEL);
861 		if (!uncore)
862 			return -ENOMEM;
863 
864 		irq_lock = drmm_kzalloc(&gt->i915->drm, sizeof(*irq_lock), GFP_KERNEL);
865 		if (!irq_lock)
866 			return -ENOMEM;
867 
868 		gt->uncore = uncore;
869 		gt->irq_lock = irq_lock;
870 
871 		intel_gt_common_init_early(gt);
872 	}
873 
874 	intel_uncore_init_early(gt->uncore, gt);
875 
876 	ret = intel_uncore_setup_mmio(gt->uncore, phys_addr);
877 	if (ret)
878 		return ret;
879 
880 	gt->phys_addr = phys_addr;
881 
882 	return 0;
883 }
884 
885 int intel_gt_probe_all(struct drm_i915_private *i915)
886 {
887 	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
888 	struct intel_gt *gt = &i915->gt0;
889 	const struct intel_gt_definition *gtdef;
890 	phys_addr_t phys_addr;
891 	unsigned int mmio_bar;
892 	unsigned int i;
893 	int ret;
894 
895 	mmio_bar = intel_mmio_bar(GRAPHICS_VER(i915));
896 	phys_addr = pci_resource_start(pdev, mmio_bar);
897 
898 	/*
899 	 * We always have at least one primary GT on any device
900 	 * and it has been already initialized early during probe
901 	 * in i915_driver_probe()
902 	 */
903 	gt->i915 = i915;
904 	gt->name = "Primary GT";
905 	gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask;
906 
907 	drm_dbg(&i915->drm, "Setting up %s\n", gt->name);
908 	ret = intel_gt_tile_setup(gt, phys_addr);
909 	if (ret)
910 		return ret;
911 
912 	i915->gt[0] = gt;
913 
914 	if (!HAS_EXTRA_GT_LIST(i915))
915 		return 0;
916 
917 	for (i = 1, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1];
918 	     gtdef->name != NULL;
919 	     i++, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1]) {
920 		gt = drmm_kzalloc(&i915->drm, sizeof(*gt), GFP_KERNEL);
921 		if (!gt) {
922 			ret = -ENOMEM;
923 			goto err;
924 		}
925 
926 		gt->i915 = i915;
927 		gt->name = gtdef->name;
928 		gt->type = gtdef->type;
929 		gt->info.engine_mask = gtdef->engine_mask;
930 		gt->info.id = i;
931 
932 		drm_dbg(&i915->drm, "Setting up %s\n", gt->name);
933 		if (GEM_WARN_ON(range_overflows_t(resource_size_t,
934 						  gtdef->mapping_base,
935 						  SZ_16M,
936 						  pci_resource_len(pdev, mmio_bar)))) {
937 			ret = -ENODEV;
938 			goto err;
939 		}
940 
941 		switch (gtdef->type) {
942 		case GT_TILE:
943 			ret = intel_gt_tile_setup(gt, phys_addr + gtdef->mapping_base);
944 			break;
945 
946 		case GT_MEDIA:
947 			ret = intel_sa_mediagt_setup(gt, phys_addr + gtdef->mapping_base,
948 						     gtdef->gsi_offset);
949 			break;
950 
951 		case GT_PRIMARY:
952 			/* Primary GT should not appear in extra GT list */
953 		default:
954 			MISSING_CASE(gtdef->type);
955 			ret = -ENODEV;
956 		}
957 
958 		if (ret)
959 			goto err;
960 
961 		i915->gt[i] = gt;
962 	}
963 
964 	return 0;
965 
966 err:
967 	i915_probe_error(i915, "Failed to initialize %s! (%d)\n", gtdef->name, ret);
968 	intel_gt_release_all(i915);
969 
970 	return ret;
971 }
972 
973 int intel_gt_tiles_init(struct drm_i915_private *i915)
974 {
975 	struct intel_gt *gt;
976 	unsigned int id;
977 	int ret;
978 
979 	for_each_gt(gt, i915, id) {
980 		ret = intel_gt_probe_lmem(gt);
981 		if (ret)
982 			return ret;
983 	}
984 
985 	return 0;
986 }
987 
988 void intel_gt_release_all(struct drm_i915_private *i915)
989 {
990 	struct intel_gt *gt;
991 	unsigned int id;
992 
993 	for_each_gt(gt, i915, id)
994 		i915->gt[id] = NULL;
995 }
996 
997 void intel_gt_info_print(const struct intel_gt_info *info,
998 			 struct drm_printer *p)
999 {
1000 	drm_printf(p, "available engines: %x\n", info->engine_mask);
1001 
1002 	intel_sseu_dump(&info->sseu, p);
1003 }
1004 
1005 struct reg_and_bit {
1006 	union {
1007 		i915_reg_t reg;
1008 		i915_mcr_reg_t mcr_reg;
1009 	};
1010 	u32 bit;
1011 };
1012 
1013 static struct reg_and_bit
1014 get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
1015 		const i915_reg_t *regs, const unsigned int num)
1016 {
1017 	const unsigned int class = engine->class;
1018 	struct reg_and_bit rb = { };
1019 
1020 	if (drm_WARN_ON_ONCE(&engine->i915->drm,
1021 			     class >= num || !regs[class].reg))
1022 		return rb;
1023 
1024 	rb.reg = regs[class];
1025 	if (gen8 && class == VIDEO_DECODE_CLASS)
1026 		rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
1027 	else
1028 		rb.bit = engine->instance;
1029 
1030 	rb.bit = BIT(rb.bit);
1031 
1032 	return rb;
1033 }
1034 
1035 /*
1036  * HW architecture suggest typical invalidation time at 40us,
1037  * with pessimistic cases up to 100us and a recommendation to
1038  * cap at 1ms. We go a bit higher just in case.
1039  */
1040 #define TLB_INVAL_TIMEOUT_US 100
1041 #define TLB_INVAL_TIMEOUT_MS 4
1042 
1043 /*
1044  * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
1045  * but are now considered MCR registers.  Since they exist within a GAM range,
1046  * the primary instance of the register rolls up the status from each unit.
1047  */
1048 static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
1049 {
1050 	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
1051 		return intel_gt_mcr_wait_for_reg(gt, rb.mcr_reg, rb.bit, 0,
1052 						 TLB_INVAL_TIMEOUT_US,
1053 						 TLB_INVAL_TIMEOUT_MS);
1054 	else
1055 		return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0,
1056 						    TLB_INVAL_TIMEOUT_US,
1057 						    TLB_INVAL_TIMEOUT_MS,
1058 						    NULL);
1059 }
1060 
1061 static void mmio_invalidate_full(struct intel_gt *gt)
1062 {
1063 	static const i915_reg_t gen8_regs[] = {
1064 		[RENDER_CLASS]			= GEN8_RTCR,
1065 		[VIDEO_DECODE_CLASS]		= GEN8_M1TCR, /* , GEN8_M2TCR */
1066 		[VIDEO_ENHANCEMENT_CLASS]	= GEN8_VTCR,
1067 		[COPY_ENGINE_CLASS]		= GEN8_BTCR,
1068 	};
1069 	static const i915_reg_t gen12_regs[] = {
1070 		[RENDER_CLASS]			= GEN12_GFX_TLB_INV_CR,
1071 		[VIDEO_DECODE_CLASS]		= GEN12_VD_TLB_INV_CR,
1072 		[VIDEO_ENHANCEMENT_CLASS]	= GEN12_VE_TLB_INV_CR,
1073 		[COPY_ENGINE_CLASS]		= GEN12_BLT_TLB_INV_CR,
1074 		[COMPUTE_CLASS]			= GEN12_COMPCTX_TLB_INV_CR,
1075 	};
1076 	static const i915_mcr_reg_t xehp_regs[] = {
1077 		[RENDER_CLASS]			= XEHP_GFX_TLB_INV_CR,
1078 		[VIDEO_DECODE_CLASS]		= XEHP_VD_TLB_INV_CR,
1079 		[VIDEO_ENHANCEMENT_CLASS]	= XEHP_VE_TLB_INV_CR,
1080 		[COPY_ENGINE_CLASS]		= XEHP_BLT_TLB_INV_CR,
1081 		[COMPUTE_CLASS]			= XEHP_COMPCTX_TLB_INV_CR,
1082 	};
1083 	struct drm_i915_private *i915 = gt->i915;
1084 	struct intel_uncore *uncore = gt->uncore;
1085 	struct intel_engine_cs *engine;
1086 	intel_engine_mask_t awake, tmp;
1087 	enum intel_engine_id id;
1088 	const i915_reg_t *regs;
1089 	unsigned int num = 0;
1090 	unsigned long flags;
1091 
1092 	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
1093 		regs = NULL;
1094 		num = ARRAY_SIZE(xehp_regs);
1095 	} else if (GRAPHICS_VER(i915) == 12) {
1096 		regs = gen12_regs;
1097 		num = ARRAY_SIZE(gen12_regs);
1098 	} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
1099 		regs = gen8_regs;
1100 		num = ARRAY_SIZE(gen8_regs);
1101 	} else if (GRAPHICS_VER(i915) < 8) {
1102 		return;
1103 	}
1104 
1105 	if (drm_WARN_ONCE(&i915->drm, !num,
1106 			  "Platform does not implement TLB invalidation!"))
1107 		return;
1108 
1109 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
1110 
1111 	intel_gt_mcr_lock(gt, &flags);
1112 	spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
1113 
1114 	awake = 0;
1115 	for_each_engine(engine, gt, id) {
1116 		struct reg_and_bit rb;
1117 
1118 		if (!intel_engine_pm_is_awake(engine))
1119 			continue;
1120 
1121 		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
1122 			intel_gt_mcr_multicast_write_fw(gt,
1123 							xehp_regs[engine->class],
1124 							BIT(engine->instance));
1125 		} else {
1126 			rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
1127 			if (!i915_mmio_reg_offset(rb.reg))
1128 				continue;
1129 
1130 			intel_uncore_write_fw(uncore, rb.reg, rb.bit);
1131 		}
1132 		awake |= engine->mask;
1133 	}
1134 
1135 	GT_TRACE(gt, "invalidated engines %08x\n", awake);
1136 
1137 	/* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
1138 	if (awake &&
1139 	    (IS_TIGERLAKE(i915) ||
1140 	     IS_DG1(i915) ||
1141 	     IS_ROCKETLAKE(i915) ||
1142 	     IS_ALDERLAKE_S(i915) ||
1143 	     IS_ALDERLAKE_P(i915)))
1144 		intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
1145 
1146 	spin_unlock(&uncore->lock);
1147 	intel_gt_mcr_unlock(gt, flags);
1148 
1149 	for_each_engine_masked(engine, gt, awake, tmp) {
1150 		struct reg_and_bit rb;
1151 
1152 		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
1153 			rb.mcr_reg = xehp_regs[engine->class];
1154 			rb.bit = BIT(engine->instance);
1155 		} else {
1156 			rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
1157 		}
1158 
1159 		if (wait_for_invalidate(gt, rb))
1160 			drm_err_ratelimited(&gt->i915->drm,
1161 					    "%s TLB invalidation did not complete in %ums!\n",
1162 					    engine->name, TLB_INVAL_TIMEOUT_MS);
1163 	}
1164 
1165 	/*
1166 	 * Use delayed put since a) we mostly expect a flurry of TLB
1167 	 * invalidations so it is good to avoid paying the forcewake cost and
1168 	 * b) it works around a bug in Icelake which cannot cope with too rapid
1169 	 * transitions.
1170 	 */
1171 	intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
1172 }
1173 
1174 static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
1175 {
1176 	u32 cur = intel_gt_tlb_seqno(gt);
1177 
1178 	/* Only skip if a *full* TLB invalidate barrier has passed */
1179 	return (s32)(cur - ALIGN(seqno, 2)) > 0;
1180 }
1181 
1182 void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno)
1183 {
1184 	intel_wakeref_t wakeref;
1185 
1186 	if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
1187 		return;
1188 
1189 	if (intel_gt_is_wedged(gt))
1190 		return;
1191 
1192 	if (tlb_seqno_passed(gt, seqno))
1193 		return;
1194 
1195 	with_intel_gt_pm_if_awake(gt, wakeref) {
1196 		mutex_lock(&gt->tlb.invalidate_lock);
1197 		if (tlb_seqno_passed(gt, seqno))
1198 			goto unlock;
1199 
1200 		mmio_invalidate_full(gt);
1201 
1202 		write_seqcount_invalidate(&gt->tlb.seqno);
1203 unlock:
1204 		mutex_unlock(&gt->tlb.invalidate_lock);
1205 	}
1206 }
1207