xref: /openbmc/linux/drivers/gpu/drm/i915/gt/intel_gt.c (revision 6427ab57)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5 
6 #include <drm/drm_managed.h>
7 #include <drm/intel-gtt.h>
8 
9 #include "gem/i915_gem_internal.h"
10 #include "gem/i915_gem_lmem.h"
11 #include "pxp/intel_pxp.h"
12 
13 #include "i915_drv.h"
14 #include "i915_perf_oa_regs.h"
15 #include "intel_context.h"
16 #include "intel_engine_pm.h"
17 #include "intel_engine_regs.h"
18 #include "intel_ggtt_gmch.h"
19 #include "intel_gt.h"
20 #include "intel_gt_buffer_pool.h"
21 #include "intel_gt_clock_utils.h"
22 #include "intel_gt_debugfs.h"
23 #include "intel_gt_mcr.h"
24 #include "intel_gt_pm.h"
25 #include "intel_gt_regs.h"
26 #include "intel_gt_requests.h"
27 #include "intel_migrate.h"
28 #include "intel_mocs.h"
29 #include "intel_pci_config.h"
30 #include "intel_pm.h"
31 #include "intel_rc6.h"
32 #include "intel_renderstate.h"
33 #include "intel_rps.h"
34 #include "intel_sa_media.h"
35 #include "intel_gt_sysfs.h"
36 #include "intel_uncore.h"
37 #include "shmem_utils.h"
38 
39 void intel_gt_common_init_early(struct intel_gt *gt)
40 {
41 	spin_lock_init(gt->irq_lock);
42 
43 	INIT_LIST_HEAD(&gt->closed_vma);
44 	spin_lock_init(&gt->closed_lock);
45 
46 	init_llist_head(&gt->watchdog.list);
47 	INIT_WORK(&gt->watchdog.work, intel_gt_watchdog_work);
48 
49 	intel_gt_init_buffer_pool(gt);
50 	intel_gt_init_reset(gt);
51 	intel_gt_init_requests(gt);
52 	intel_gt_init_timelines(gt);
53 	mutex_init(&gt->tlb.invalidate_lock);
54 	seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
55 	intel_gt_pm_init_early(gt);
56 
57 	intel_uc_init_early(&gt->uc);
58 	intel_rps_init_early(&gt->rps);
59 }
60 
61 /* Preliminary initialization of Tile 0 */
62 int intel_root_gt_init_early(struct drm_i915_private *i915)
63 {
64 	struct intel_gt *gt = to_gt(i915);
65 
66 	gt->i915 = i915;
67 	gt->uncore = &i915->uncore;
68 	gt->irq_lock = drmm_kzalloc(&i915->drm, sizeof(*gt->irq_lock), GFP_KERNEL);
69 	if (!gt->irq_lock)
70 		return -ENOMEM;
71 
72 	intel_gt_common_init_early(gt);
73 
74 	return 0;
75 }
76 
77 static int intel_gt_probe_lmem(struct intel_gt *gt)
78 {
79 	struct drm_i915_private *i915 = gt->i915;
80 	unsigned int instance = gt->info.id;
81 	int id = INTEL_REGION_LMEM_0 + instance;
82 	struct intel_memory_region *mem;
83 	int err;
84 
85 	mem = intel_gt_setup_lmem(gt);
86 	if (IS_ERR(mem)) {
87 		err = PTR_ERR(mem);
88 		if (err == -ENODEV)
89 			return 0;
90 
91 		drm_err(&i915->drm,
92 			"Failed to setup region(%d) type=%d\n",
93 			err, INTEL_MEMORY_LOCAL);
94 		return err;
95 	}
96 
97 	mem->id = id;
98 	mem->instance = instance;
99 
100 	intel_memory_region_set_name(mem, "local%u", mem->instance);
101 
102 	GEM_BUG_ON(!HAS_REGION(i915, id));
103 	GEM_BUG_ON(i915->mm.regions[id]);
104 	i915->mm.regions[id] = mem;
105 
106 	return 0;
107 }
108 
109 int intel_gt_assign_ggtt(struct intel_gt *gt)
110 {
111 	gt->ggtt = drmm_kzalloc(&gt->i915->drm, sizeof(*gt->ggtt), GFP_KERNEL);
112 
113 	return gt->ggtt ? 0 : -ENOMEM;
114 }
115 
116 int intel_gt_init_mmio(struct intel_gt *gt)
117 {
118 	intel_gt_init_clock_frequency(gt);
119 
120 	intel_uc_init_mmio(&gt->uc);
121 	intel_sseu_info_init(gt);
122 	intel_gt_mcr_init(gt);
123 
124 	return intel_engines_init_mmio(gt);
125 }
126 
127 static void init_unused_ring(struct intel_gt *gt, u32 base)
128 {
129 	struct intel_uncore *uncore = gt->uncore;
130 
131 	intel_uncore_write(uncore, RING_CTL(base), 0);
132 	intel_uncore_write(uncore, RING_HEAD(base), 0);
133 	intel_uncore_write(uncore, RING_TAIL(base), 0);
134 	intel_uncore_write(uncore, RING_START(base), 0);
135 }
136 
137 static void init_unused_rings(struct intel_gt *gt)
138 {
139 	struct drm_i915_private *i915 = gt->i915;
140 
141 	if (IS_I830(i915)) {
142 		init_unused_ring(gt, PRB1_BASE);
143 		init_unused_ring(gt, SRB0_BASE);
144 		init_unused_ring(gt, SRB1_BASE);
145 		init_unused_ring(gt, SRB2_BASE);
146 		init_unused_ring(gt, SRB3_BASE);
147 	} else if (GRAPHICS_VER(i915) == 2) {
148 		init_unused_ring(gt, SRB0_BASE);
149 		init_unused_ring(gt, SRB1_BASE);
150 	} else if (GRAPHICS_VER(i915) == 3) {
151 		init_unused_ring(gt, PRB1_BASE);
152 		init_unused_ring(gt, PRB2_BASE);
153 	}
154 }
155 
156 int intel_gt_init_hw(struct intel_gt *gt)
157 {
158 	struct drm_i915_private *i915 = gt->i915;
159 	struct intel_uncore *uncore = gt->uncore;
160 	int ret;
161 
162 	gt->last_init_time = ktime_get();
163 
164 	/* Double layer security blanket, see i915_gem_init() */
165 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
166 
167 	if (HAS_EDRAM(i915) && GRAPHICS_VER(i915) < 9)
168 		intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf));
169 
170 	if (IS_HASWELL(i915))
171 		intel_uncore_write(uncore,
172 				   HSW_MI_PREDICATE_RESULT_2,
173 				   IS_HSW_GT3(i915) ?
174 				   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
175 
176 	/* Apply the GT workarounds... */
177 	intel_gt_apply_workarounds(gt);
178 	/* ...and determine whether they are sticking. */
179 	intel_gt_verify_workarounds(gt, "init");
180 
181 	intel_gt_init_swizzling(gt);
182 
183 	/*
184 	 * At least 830 can leave some of the unused rings
185 	 * "active" (ie. head != tail) after resume which
186 	 * will prevent c3 entry. Makes sure all unused rings
187 	 * are totally idle.
188 	 */
189 	init_unused_rings(gt);
190 
191 	ret = i915_ppgtt_init_hw(gt);
192 	if (ret) {
193 		DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
194 		goto out;
195 	}
196 
197 	/* We can't enable contexts until all firmware is loaded */
198 	ret = intel_uc_init_hw(&gt->uc);
199 	if (ret) {
200 		i915_probe_error(i915, "Enabling uc failed (%d)\n", ret);
201 		goto out;
202 	}
203 
204 	intel_mocs_init(gt);
205 
206 out:
207 	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
208 	return ret;
209 }
210 
211 static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set)
212 {
213 	intel_uncore_rmw(uncore, reg, 0, set);
214 }
215 
216 static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
217 {
218 	intel_uncore_rmw(uncore, reg, clr, 0);
219 }
220 
221 static void clear_register(struct intel_uncore *uncore, i915_reg_t reg)
222 {
223 	intel_uncore_rmw(uncore, reg, 0, 0);
224 }
225 
226 static void gen6_clear_engine_error_register(struct intel_engine_cs *engine)
227 {
228 	GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0);
229 	GEN6_RING_FAULT_REG_POSTING_READ(engine);
230 }
231 
232 i915_reg_t intel_gt_perf_limit_reasons_reg(struct intel_gt *gt)
233 {
234 	/* GT0_PERF_LIMIT_REASONS is available only for Gen11+ */
235 	if (GRAPHICS_VER(gt->i915) < 11)
236 		return INVALID_MMIO_REG;
237 
238 	return gt->type == GT_MEDIA ?
239 		MTL_MEDIA_PERF_LIMIT_REASONS : GT0_PERF_LIMIT_REASONS;
240 }
241 
242 void
243 intel_gt_clear_error_registers(struct intel_gt *gt,
244 			       intel_engine_mask_t engine_mask)
245 {
246 	struct drm_i915_private *i915 = gt->i915;
247 	struct intel_uncore *uncore = gt->uncore;
248 	u32 eir;
249 
250 	if (GRAPHICS_VER(i915) != 2)
251 		clear_register(uncore, PGTBL_ER);
252 
253 	if (GRAPHICS_VER(i915) < 4)
254 		clear_register(uncore, IPEIR(RENDER_RING_BASE));
255 	else
256 		clear_register(uncore, IPEIR_I965);
257 
258 	clear_register(uncore, EIR);
259 	eir = intel_uncore_read(uncore, EIR);
260 	if (eir) {
261 		/*
262 		 * some errors might have become stuck,
263 		 * mask them.
264 		 */
265 		DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
266 		rmw_set(uncore, EMR, eir);
267 		intel_uncore_write(uncore, GEN2_IIR,
268 				   I915_MASTER_ERROR_INTERRUPT);
269 	}
270 
271 	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
272 		intel_gt_mcr_multicast_rmw(gt, XEHP_RING_FAULT_REG,
273 					   RING_FAULT_VALID, 0);
274 		intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
275 	} else if (GRAPHICS_VER(i915) >= 12) {
276 		rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID);
277 		intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG);
278 	} else if (GRAPHICS_VER(i915) >= 8) {
279 		rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID);
280 		intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG);
281 	} else if (GRAPHICS_VER(i915) >= 6) {
282 		struct intel_engine_cs *engine;
283 		enum intel_engine_id id;
284 
285 		for_each_engine_masked(engine, gt, engine_mask, id)
286 			gen6_clear_engine_error_register(engine);
287 	}
288 }
289 
290 static void gen6_check_faults(struct intel_gt *gt)
291 {
292 	struct intel_engine_cs *engine;
293 	enum intel_engine_id id;
294 	u32 fault;
295 
296 	for_each_engine(engine, gt, id) {
297 		fault = GEN6_RING_FAULT_REG_READ(engine);
298 		if (fault & RING_FAULT_VALID) {
299 			drm_dbg(&engine->i915->drm, "Unexpected fault\n"
300 				"\tAddr: 0x%08lx\n"
301 				"\tAddress space: %s\n"
302 				"\tSource ID: %d\n"
303 				"\tType: %d\n",
304 				fault & PAGE_MASK,
305 				fault & RING_FAULT_GTTSEL_MASK ?
306 				"GGTT" : "PPGTT",
307 				RING_FAULT_SRCID(fault),
308 				RING_FAULT_FAULT_TYPE(fault));
309 		}
310 	}
311 }
312 
313 static void xehp_check_faults(struct intel_gt *gt)
314 {
315 	u32 fault;
316 
317 	/*
318 	 * Although the fault register now lives in an MCR register range,
319 	 * the GAM registers are special and we only truly need to read
320 	 * the "primary" GAM instance rather than handling each instance
321 	 * individually.  intel_gt_mcr_read_any() will automatically steer
322 	 * toward the primary instance.
323 	 */
324 	fault = intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
325 	if (fault & RING_FAULT_VALID) {
326 		u32 fault_data0, fault_data1;
327 		u64 fault_addr;
328 
329 		fault_data0 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA0);
330 		fault_data1 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA1);
331 
332 		fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
333 			     ((u64)fault_data0 << 12);
334 
335 		drm_dbg(&gt->i915->drm, "Unexpected fault\n"
336 			"\tAddr: 0x%08x_%08x\n"
337 			"\tAddress space: %s\n"
338 			"\tEngine ID: %d\n"
339 			"\tSource ID: %d\n"
340 			"\tType: %d\n",
341 			upper_32_bits(fault_addr), lower_32_bits(fault_addr),
342 			fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
343 			GEN8_RING_FAULT_ENGINE_ID(fault),
344 			RING_FAULT_SRCID(fault),
345 			RING_FAULT_FAULT_TYPE(fault));
346 	}
347 }
348 
349 static void gen8_check_faults(struct intel_gt *gt)
350 {
351 	struct intel_uncore *uncore = gt->uncore;
352 	i915_reg_t fault_reg, fault_data0_reg, fault_data1_reg;
353 	u32 fault;
354 
355 	if (GRAPHICS_VER(gt->i915) >= 12) {
356 		fault_reg = GEN12_RING_FAULT_REG;
357 		fault_data0_reg = GEN12_FAULT_TLB_DATA0;
358 		fault_data1_reg = GEN12_FAULT_TLB_DATA1;
359 	} else {
360 		fault_reg = GEN8_RING_FAULT_REG;
361 		fault_data0_reg = GEN8_FAULT_TLB_DATA0;
362 		fault_data1_reg = GEN8_FAULT_TLB_DATA1;
363 	}
364 
365 	fault = intel_uncore_read(uncore, fault_reg);
366 	if (fault & RING_FAULT_VALID) {
367 		u32 fault_data0, fault_data1;
368 		u64 fault_addr;
369 
370 		fault_data0 = intel_uncore_read(uncore, fault_data0_reg);
371 		fault_data1 = intel_uncore_read(uncore, fault_data1_reg);
372 
373 		fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
374 			     ((u64)fault_data0 << 12);
375 
376 		drm_dbg(&uncore->i915->drm, "Unexpected fault\n"
377 			"\tAddr: 0x%08x_%08x\n"
378 			"\tAddress space: %s\n"
379 			"\tEngine ID: %d\n"
380 			"\tSource ID: %d\n"
381 			"\tType: %d\n",
382 			upper_32_bits(fault_addr), lower_32_bits(fault_addr),
383 			fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
384 			GEN8_RING_FAULT_ENGINE_ID(fault),
385 			RING_FAULT_SRCID(fault),
386 			RING_FAULT_FAULT_TYPE(fault));
387 	}
388 }
389 
390 void intel_gt_check_and_clear_faults(struct intel_gt *gt)
391 {
392 	struct drm_i915_private *i915 = gt->i915;
393 
394 	/* From GEN8 onwards we only have one 'All Engine Fault Register' */
395 	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
396 		xehp_check_faults(gt);
397 	else if (GRAPHICS_VER(i915) >= 8)
398 		gen8_check_faults(gt);
399 	else if (GRAPHICS_VER(i915) >= 6)
400 		gen6_check_faults(gt);
401 	else
402 		return;
403 
404 	intel_gt_clear_error_registers(gt, ALL_ENGINES);
405 }
406 
407 void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
408 {
409 	struct intel_uncore *uncore = gt->uncore;
410 	intel_wakeref_t wakeref;
411 
412 	/*
413 	 * No actual flushing is required for the GTT write domain for reads
414 	 * from the GTT domain. Writes to it "immediately" go to main memory
415 	 * as far as we know, so there's no chipset flush. It also doesn't
416 	 * land in the GPU render cache.
417 	 *
418 	 * However, we do have to enforce the order so that all writes through
419 	 * the GTT land before any writes to the device, such as updates to
420 	 * the GATT itself.
421 	 *
422 	 * We also have to wait a bit for the writes to land from the GTT.
423 	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
424 	 * timing. This issue has only been observed when switching quickly
425 	 * between GTT writes and CPU reads from inside the kernel on recent hw,
426 	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
427 	 * system agents we cannot reproduce this behaviour, until Cannonlake
428 	 * that was!).
429 	 */
430 
431 	wmb();
432 
433 	if (INTEL_INFO(gt->i915)->has_coherent_ggtt)
434 		return;
435 
436 	intel_gt_chipset_flush(gt);
437 
438 	with_intel_runtime_pm_if_in_use(uncore->rpm, wakeref) {
439 		unsigned long flags;
440 
441 		spin_lock_irqsave(&uncore->lock, flags);
442 		intel_uncore_posting_read_fw(uncore,
443 					     RING_HEAD(RENDER_RING_BASE));
444 		spin_unlock_irqrestore(&uncore->lock, flags);
445 	}
446 }
447 
448 void intel_gt_chipset_flush(struct intel_gt *gt)
449 {
450 	wmb();
451 	if (GRAPHICS_VER(gt->i915) < 6)
452 		intel_ggtt_gmch_flush();
453 }
454 
455 void intel_gt_driver_register(struct intel_gt *gt)
456 {
457 	intel_gsc_init(&gt->gsc, gt->i915);
458 
459 	intel_rps_driver_register(&gt->rps);
460 
461 	intel_gt_debugfs_register(gt);
462 	intel_gt_sysfs_register(gt);
463 }
464 
465 static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
466 {
467 	struct drm_i915_private *i915 = gt->i915;
468 	struct drm_i915_gem_object *obj;
469 	struct i915_vma *vma;
470 	int ret;
471 
472 	obj = i915_gem_object_create_lmem(i915, size,
473 					  I915_BO_ALLOC_VOLATILE |
474 					  I915_BO_ALLOC_GPU_ONLY);
475 	if (IS_ERR(obj))
476 		obj = i915_gem_object_create_stolen(i915, size);
477 	if (IS_ERR(obj))
478 		obj = i915_gem_object_create_internal(i915, size);
479 	if (IS_ERR(obj)) {
480 		drm_err(&i915->drm, "Failed to allocate scratch page\n");
481 		return PTR_ERR(obj);
482 	}
483 
484 	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
485 	if (IS_ERR(vma)) {
486 		ret = PTR_ERR(vma);
487 		goto err_unref;
488 	}
489 
490 	ret = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
491 	if (ret)
492 		goto err_unref;
493 
494 	gt->scratch = i915_vma_make_unshrinkable(vma);
495 
496 	return 0;
497 
498 err_unref:
499 	i915_gem_object_put(obj);
500 	return ret;
501 }
502 
503 static void intel_gt_fini_scratch(struct intel_gt *gt)
504 {
505 	i915_vma_unpin_and_release(&gt->scratch, 0);
506 }
507 
508 static struct i915_address_space *kernel_vm(struct intel_gt *gt)
509 {
510 	if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING)
511 		return &i915_ppgtt_create(gt, I915_BO_ALLOC_PM_EARLY)->vm;
512 	else
513 		return i915_vm_get(&gt->ggtt->vm);
514 }
515 
516 static int __engines_record_defaults(struct intel_gt *gt)
517 {
518 	struct i915_request *requests[I915_NUM_ENGINES] = {};
519 	struct intel_engine_cs *engine;
520 	enum intel_engine_id id;
521 	int err = 0;
522 
523 	/*
524 	 * As we reset the gpu during very early sanitisation, the current
525 	 * register state on the GPU should reflect its defaults values.
526 	 * We load a context onto the hw (with restore-inhibit), then switch
527 	 * over to a second context to save that default register state. We
528 	 * can then prime every new context with that state so they all start
529 	 * from the same default HW values.
530 	 */
531 
532 	for_each_engine(engine, gt, id) {
533 		struct intel_renderstate so;
534 		struct intel_context *ce;
535 		struct i915_request *rq;
536 
537 		/* We must be able to switch to something! */
538 		GEM_BUG_ON(!engine->kernel_context);
539 
540 		ce = intel_context_create(engine);
541 		if (IS_ERR(ce)) {
542 			err = PTR_ERR(ce);
543 			goto out;
544 		}
545 
546 		err = intel_renderstate_init(&so, ce);
547 		if (err)
548 			goto err;
549 
550 		rq = i915_request_create(ce);
551 		if (IS_ERR(rq)) {
552 			err = PTR_ERR(rq);
553 			goto err_fini;
554 		}
555 
556 		err = intel_engine_emit_ctx_wa(rq);
557 		if (err)
558 			goto err_rq;
559 
560 		err = intel_renderstate_emit(&so, rq);
561 		if (err)
562 			goto err_rq;
563 
564 err_rq:
565 		requests[id] = i915_request_get(rq);
566 		i915_request_add(rq);
567 err_fini:
568 		intel_renderstate_fini(&so, ce);
569 err:
570 		if (err) {
571 			intel_context_put(ce);
572 			goto out;
573 		}
574 	}
575 
576 	/* Flush the default context image to memory, and enable powersaving. */
577 	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
578 		err = -EIO;
579 		goto out;
580 	}
581 
582 	for (id = 0; id < ARRAY_SIZE(requests); id++) {
583 		struct i915_request *rq;
584 		struct file *state;
585 
586 		rq = requests[id];
587 		if (!rq)
588 			continue;
589 
590 		if (rq->fence.error) {
591 			err = -EIO;
592 			goto out;
593 		}
594 
595 		GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags));
596 		if (!rq->context->state)
597 			continue;
598 
599 		/* Keep a copy of the state's backing pages; free the obj */
600 		state = shmem_create_from_object(rq->context->state->obj);
601 		if (IS_ERR(state)) {
602 			err = PTR_ERR(state);
603 			goto out;
604 		}
605 		rq->engine->default_state = state;
606 	}
607 
608 out:
609 	/*
610 	 * If we have to abandon now, we expect the engines to be idle
611 	 * and ready to be torn-down. The quickest way we can accomplish
612 	 * this is by declaring ourselves wedged.
613 	 */
614 	if (err)
615 		intel_gt_set_wedged(gt);
616 
617 	for (id = 0; id < ARRAY_SIZE(requests); id++) {
618 		struct intel_context *ce;
619 		struct i915_request *rq;
620 
621 		rq = requests[id];
622 		if (!rq)
623 			continue;
624 
625 		ce = rq->context;
626 		i915_request_put(rq);
627 		intel_context_put(ce);
628 	}
629 	return err;
630 }
631 
632 static int __engines_verify_workarounds(struct intel_gt *gt)
633 {
634 	struct intel_engine_cs *engine;
635 	enum intel_engine_id id;
636 	int err = 0;
637 
638 	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
639 		return 0;
640 
641 	for_each_engine(engine, gt, id) {
642 		if (intel_engine_verify_workarounds(engine, "load"))
643 			err = -EIO;
644 	}
645 
646 	/* Flush and restore the kernel context for safety */
647 	if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME)
648 		err = -EIO;
649 
650 	return err;
651 }
652 
653 static void __intel_gt_disable(struct intel_gt *gt)
654 {
655 	intel_gt_set_wedged_on_fini(gt);
656 
657 	intel_gt_suspend_prepare(gt);
658 	intel_gt_suspend_late(gt);
659 
660 	GEM_BUG_ON(intel_gt_pm_is_awake(gt));
661 }
662 
663 int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
664 {
665 	long remaining_timeout;
666 
667 	/* If the device is asleep, we have no requests outstanding */
668 	if (!intel_gt_pm_is_awake(gt))
669 		return 0;
670 
671 	while ((timeout = intel_gt_retire_requests_timeout(gt, timeout,
672 							   &remaining_timeout)) > 0) {
673 		cond_resched();
674 		if (signal_pending(current))
675 			return -EINTR;
676 	}
677 
678 	return timeout ? timeout : intel_uc_wait_for_idle(&gt->uc,
679 							  remaining_timeout);
680 }
681 
682 int intel_gt_init(struct intel_gt *gt)
683 {
684 	int err;
685 
686 	err = i915_inject_probe_error(gt->i915, -ENODEV);
687 	if (err)
688 		return err;
689 
690 	intel_gt_init_workarounds(gt);
691 
692 	/*
693 	 * This is just a security blanket to placate dragons.
694 	 * On some systems, we very sporadically observe that the first TLBs
695 	 * used by the CS may be stale, despite us poking the TLB reset. If
696 	 * we hold the forcewake during initialisation these problems
697 	 * just magically go away.
698 	 */
699 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
700 
701 	err = intel_gt_init_scratch(gt,
702 				    GRAPHICS_VER(gt->i915) == 2 ? SZ_256K : SZ_4K);
703 	if (err)
704 		goto out_fw;
705 
706 	intel_gt_pm_init(gt);
707 
708 	gt->vm = kernel_vm(gt);
709 	if (!gt->vm) {
710 		err = -ENOMEM;
711 		goto err_pm;
712 	}
713 
714 	intel_set_mocs_index(gt);
715 
716 	err = intel_engines_init(gt);
717 	if (err)
718 		goto err_engines;
719 
720 	err = intel_uc_init(&gt->uc);
721 	if (err)
722 		goto err_engines;
723 
724 	err = intel_gt_resume(gt);
725 	if (err)
726 		goto err_uc_init;
727 
728 	err = intel_gt_init_hwconfig(gt);
729 	if (err)
730 		drm_err(&gt->i915->drm, "Failed to retrieve hwconfig table: %pe\n",
731 			ERR_PTR(err));
732 
733 	err = __engines_record_defaults(gt);
734 	if (err)
735 		goto err_gt;
736 
737 	err = __engines_verify_workarounds(gt);
738 	if (err)
739 		goto err_gt;
740 
741 	intel_uc_init_late(&gt->uc);
742 
743 	err = i915_inject_probe_error(gt->i915, -EIO);
744 	if (err)
745 		goto err_gt;
746 
747 	intel_migrate_init(&gt->migrate, gt);
748 
749 	intel_pxp_init(&gt->pxp);
750 
751 	goto out_fw;
752 err_gt:
753 	__intel_gt_disable(gt);
754 	intel_uc_fini_hw(&gt->uc);
755 err_uc_init:
756 	intel_uc_fini(&gt->uc);
757 err_engines:
758 	intel_engines_release(gt);
759 	i915_vm_put(fetch_and_zero(&gt->vm));
760 err_pm:
761 	intel_gt_pm_fini(gt);
762 	intel_gt_fini_scratch(gt);
763 out_fw:
764 	if (err)
765 		intel_gt_set_wedged_on_init(gt);
766 	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
767 	return err;
768 }
769 
770 void intel_gt_driver_remove(struct intel_gt *gt)
771 {
772 	__intel_gt_disable(gt);
773 
774 	intel_migrate_fini(&gt->migrate);
775 	intel_uc_driver_remove(&gt->uc);
776 
777 	intel_engines_release(gt);
778 
779 	intel_gt_flush_buffer_pool(gt);
780 }
781 
782 void intel_gt_driver_unregister(struct intel_gt *gt)
783 {
784 	intel_wakeref_t wakeref;
785 
786 	intel_gt_sysfs_unregister(gt);
787 	intel_rps_driver_unregister(&gt->rps);
788 	intel_gsc_fini(&gt->gsc);
789 
790 	intel_pxp_fini(&gt->pxp);
791 
792 	/*
793 	 * Upon unregistering the device to prevent any new users, cancel
794 	 * all in-flight requests so that we can quickly unbind the active
795 	 * resources.
796 	 */
797 	intel_gt_set_wedged_on_fini(gt);
798 
799 	/* Scrub all HW state upon release */
800 	with_intel_runtime_pm(gt->uncore->rpm, wakeref)
801 		__intel_gt_reset(gt, ALL_ENGINES);
802 }
803 
804 void intel_gt_driver_release(struct intel_gt *gt)
805 {
806 	struct i915_address_space *vm;
807 
808 	vm = fetch_and_zero(&gt->vm);
809 	if (vm) /* FIXME being called twice on error paths :( */
810 		i915_vm_put(vm);
811 
812 	intel_wa_list_free(&gt->wa_list);
813 	intel_gt_pm_fini(gt);
814 	intel_gt_fini_scratch(gt);
815 	intel_gt_fini_buffer_pool(gt);
816 	intel_gt_fini_hwconfig(gt);
817 }
818 
819 void intel_gt_driver_late_release_all(struct drm_i915_private *i915)
820 {
821 	struct intel_gt *gt;
822 	unsigned int id;
823 
824 	/* We need to wait for inflight RCU frees to release their grip */
825 	rcu_barrier();
826 
827 	for_each_gt(gt, i915, id) {
828 		intel_uc_driver_late_release(&gt->uc);
829 		intel_gt_fini_requests(gt);
830 		intel_gt_fini_reset(gt);
831 		intel_gt_fini_timelines(gt);
832 		mutex_destroy(&gt->tlb.invalidate_lock);
833 		intel_engines_free(gt);
834 	}
835 }
836 
837 static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr)
838 {
839 	int ret;
840 
841 	if (!gt_is_root(gt)) {
842 		struct intel_uncore *uncore;
843 		spinlock_t *irq_lock;
844 
845 		uncore = drmm_kzalloc(&gt->i915->drm, sizeof(*uncore), GFP_KERNEL);
846 		if (!uncore)
847 			return -ENOMEM;
848 
849 		irq_lock = drmm_kzalloc(&gt->i915->drm, sizeof(*irq_lock), GFP_KERNEL);
850 		if (!irq_lock)
851 			return -ENOMEM;
852 
853 		gt->uncore = uncore;
854 		gt->irq_lock = irq_lock;
855 
856 		intel_gt_common_init_early(gt);
857 	}
858 
859 	intel_uncore_init_early(gt->uncore, gt);
860 
861 	ret = intel_uncore_setup_mmio(gt->uncore, phys_addr);
862 	if (ret)
863 		return ret;
864 
865 	gt->phys_addr = phys_addr;
866 
867 	return 0;
868 }
869 
870 int intel_gt_probe_all(struct drm_i915_private *i915)
871 {
872 	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
873 	struct intel_gt *gt = &i915->gt0;
874 	const struct intel_gt_definition *gtdef;
875 	phys_addr_t phys_addr;
876 	unsigned int mmio_bar;
877 	unsigned int i;
878 	int ret;
879 
880 	mmio_bar = intel_mmio_bar(GRAPHICS_VER(i915));
881 	phys_addr = pci_resource_start(pdev, mmio_bar);
882 
883 	/*
884 	 * We always have at least one primary GT on any device
885 	 * and it has been already initialized early during probe
886 	 * in i915_driver_probe()
887 	 */
888 	gt->i915 = i915;
889 	gt->name = "Primary GT";
890 	gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask;
891 
892 	drm_dbg(&i915->drm, "Setting up %s\n", gt->name);
893 	ret = intel_gt_tile_setup(gt, phys_addr);
894 	if (ret)
895 		return ret;
896 
897 	i915->gt[0] = gt;
898 
899 	if (!HAS_EXTRA_GT_LIST(i915))
900 		return 0;
901 
902 	for (i = 1, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1];
903 	     gtdef->name != NULL;
904 	     i++, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1]) {
905 		gt = drmm_kzalloc(&i915->drm, sizeof(*gt), GFP_KERNEL);
906 		if (!gt) {
907 			ret = -ENOMEM;
908 			goto err;
909 		}
910 
911 		gt->i915 = i915;
912 		gt->name = gtdef->name;
913 		gt->type = gtdef->type;
914 		gt->info.engine_mask = gtdef->engine_mask;
915 		gt->info.id = i;
916 
917 		drm_dbg(&i915->drm, "Setting up %s\n", gt->name);
918 		if (GEM_WARN_ON(range_overflows_t(resource_size_t,
919 						  gtdef->mapping_base,
920 						  SZ_16M,
921 						  pci_resource_len(pdev, mmio_bar)))) {
922 			ret = -ENODEV;
923 			goto err;
924 		}
925 
926 		switch (gtdef->type) {
927 		case GT_TILE:
928 			ret = intel_gt_tile_setup(gt, phys_addr + gtdef->mapping_base);
929 			break;
930 
931 		case GT_MEDIA:
932 			ret = intel_sa_mediagt_setup(gt, phys_addr + gtdef->mapping_base,
933 						     gtdef->gsi_offset);
934 			break;
935 
936 		case GT_PRIMARY:
937 			/* Primary GT should not appear in extra GT list */
938 		default:
939 			MISSING_CASE(gtdef->type);
940 			ret = -ENODEV;
941 		}
942 
943 		if (ret)
944 			goto err;
945 
946 		i915->gt[i] = gt;
947 	}
948 
949 	return 0;
950 
951 err:
952 	i915_probe_error(i915, "Failed to initialize %s! (%d)\n", gtdef->name, ret);
953 	intel_gt_release_all(i915);
954 
955 	return ret;
956 }
957 
958 int intel_gt_tiles_init(struct drm_i915_private *i915)
959 {
960 	struct intel_gt *gt;
961 	unsigned int id;
962 	int ret;
963 
964 	for_each_gt(gt, i915, id) {
965 		ret = intel_gt_probe_lmem(gt);
966 		if (ret)
967 			return ret;
968 	}
969 
970 	return 0;
971 }
972 
973 void intel_gt_release_all(struct drm_i915_private *i915)
974 {
975 	struct intel_gt *gt;
976 	unsigned int id;
977 
978 	for_each_gt(gt, i915, id)
979 		i915->gt[id] = NULL;
980 }
981 
982 void intel_gt_info_print(const struct intel_gt_info *info,
983 			 struct drm_printer *p)
984 {
985 	drm_printf(p, "available engines: %x\n", info->engine_mask);
986 
987 	intel_sseu_dump(&info->sseu, p);
988 }
989 
990 struct reg_and_bit {
991 	union {
992 		i915_reg_t reg;
993 		i915_mcr_reg_t mcr_reg;
994 	};
995 	u32 bit;
996 };
997 
998 static struct reg_and_bit
999 get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
1000 		const i915_reg_t *regs, const unsigned int num)
1001 {
1002 	const unsigned int class = engine->class;
1003 	struct reg_and_bit rb = { };
1004 
1005 	if (drm_WARN_ON_ONCE(&engine->i915->drm,
1006 			     class >= num || !regs[class].reg))
1007 		return rb;
1008 
1009 	rb.reg = regs[class];
1010 	if (gen8 && class == VIDEO_DECODE_CLASS)
1011 		rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
1012 	else
1013 		rb.bit = engine->instance;
1014 
1015 	rb.bit = BIT(rb.bit);
1016 
1017 	return rb;
1018 }
1019 
1020 /*
1021  * HW architecture suggest typical invalidation time at 40us,
1022  * with pessimistic cases up to 100us and a recommendation to
1023  * cap at 1ms. We go a bit higher just in case.
1024  */
1025 #define TLB_INVAL_TIMEOUT_US 100
1026 #define TLB_INVAL_TIMEOUT_MS 4
1027 
1028 /*
1029  * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
1030  * but are now considered MCR registers.  Since they exist within a GAM range,
1031  * the primary instance of the register rolls up the status from each unit.
1032  */
1033 static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
1034 {
1035 	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
1036 		return intel_gt_mcr_wait_for_reg_fw(gt, rb.mcr_reg, rb.bit, 0,
1037 						    TLB_INVAL_TIMEOUT_US,
1038 						    TLB_INVAL_TIMEOUT_MS);
1039 	else
1040 		return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0,
1041 						    TLB_INVAL_TIMEOUT_US,
1042 						    TLB_INVAL_TIMEOUT_MS,
1043 						    NULL);
1044 }
1045 
1046 static void mmio_invalidate_full(struct intel_gt *gt)
1047 {
1048 	static const i915_reg_t gen8_regs[] = {
1049 		[RENDER_CLASS]			= GEN8_RTCR,
1050 		[VIDEO_DECODE_CLASS]		= GEN8_M1TCR, /* , GEN8_M2TCR */
1051 		[VIDEO_ENHANCEMENT_CLASS]	= GEN8_VTCR,
1052 		[COPY_ENGINE_CLASS]		= GEN8_BTCR,
1053 	};
1054 	static const i915_reg_t gen12_regs[] = {
1055 		[RENDER_CLASS]			= GEN12_GFX_TLB_INV_CR,
1056 		[VIDEO_DECODE_CLASS]		= GEN12_VD_TLB_INV_CR,
1057 		[VIDEO_ENHANCEMENT_CLASS]	= GEN12_VE_TLB_INV_CR,
1058 		[COPY_ENGINE_CLASS]		= GEN12_BLT_TLB_INV_CR,
1059 		[COMPUTE_CLASS]			= GEN12_COMPCTX_TLB_INV_CR,
1060 	};
1061 	static const i915_mcr_reg_t xehp_regs[] = {
1062 		[RENDER_CLASS]			= XEHP_GFX_TLB_INV_CR,
1063 		[VIDEO_DECODE_CLASS]		= XEHP_VD_TLB_INV_CR,
1064 		[VIDEO_ENHANCEMENT_CLASS]	= XEHP_VE_TLB_INV_CR,
1065 		[COPY_ENGINE_CLASS]		= XEHP_BLT_TLB_INV_CR,
1066 		[COMPUTE_CLASS]			= XEHP_COMPCTX_TLB_INV_CR,
1067 	};
1068 	struct drm_i915_private *i915 = gt->i915;
1069 	struct intel_uncore *uncore = gt->uncore;
1070 	struct intel_engine_cs *engine;
1071 	intel_engine_mask_t awake, tmp;
1072 	enum intel_engine_id id;
1073 	const i915_reg_t *regs;
1074 	unsigned int num = 0;
1075 
1076 	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
1077 		regs = NULL;
1078 		num = ARRAY_SIZE(xehp_regs);
1079 	} else if (GRAPHICS_VER(i915) == 12) {
1080 		regs = gen12_regs;
1081 		num = ARRAY_SIZE(gen12_regs);
1082 	} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
1083 		regs = gen8_regs;
1084 		num = ARRAY_SIZE(gen8_regs);
1085 	} else if (GRAPHICS_VER(i915) < 8) {
1086 		return;
1087 	}
1088 
1089 	if (drm_WARN_ONCE(&i915->drm, !num,
1090 			  "Platform does not implement TLB invalidation!"))
1091 		return;
1092 
1093 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
1094 
1095 	spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
1096 
1097 	awake = 0;
1098 	for_each_engine(engine, gt, id) {
1099 		struct reg_and_bit rb;
1100 
1101 		if (!intel_engine_pm_is_awake(engine))
1102 			continue;
1103 
1104 		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
1105 			intel_gt_mcr_multicast_write_fw(gt,
1106 							xehp_regs[engine->class],
1107 							BIT(engine->instance));
1108 		} else {
1109 			rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
1110 			if (!i915_mmio_reg_offset(rb.reg))
1111 				continue;
1112 
1113 			intel_uncore_write_fw(uncore, rb.reg, rb.bit);
1114 		}
1115 		awake |= engine->mask;
1116 	}
1117 
1118 	GT_TRACE(gt, "invalidated engines %08x\n", awake);
1119 
1120 	/* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
1121 	if (awake &&
1122 	    (IS_TIGERLAKE(i915) ||
1123 	     IS_DG1(i915) ||
1124 	     IS_ROCKETLAKE(i915) ||
1125 	     IS_ALDERLAKE_S(i915) ||
1126 	     IS_ALDERLAKE_P(i915)))
1127 		intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
1128 
1129 	spin_unlock_irq(&uncore->lock);
1130 
1131 	for_each_engine_masked(engine, gt, awake, tmp) {
1132 		struct reg_and_bit rb;
1133 
1134 		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
1135 			rb.mcr_reg = xehp_regs[engine->class];
1136 			rb.bit = BIT(engine->instance);
1137 		} else {
1138 			rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
1139 		}
1140 
1141 		if (wait_for_invalidate(gt, rb))
1142 			drm_err_ratelimited(&gt->i915->drm,
1143 					    "%s TLB invalidation did not complete in %ums!\n",
1144 					    engine->name, TLB_INVAL_TIMEOUT_MS);
1145 	}
1146 
1147 	/*
1148 	 * Use delayed put since a) we mostly expect a flurry of TLB
1149 	 * invalidations so it is good to avoid paying the forcewake cost and
1150 	 * b) it works around a bug in Icelake which cannot cope with too rapid
1151 	 * transitions.
1152 	 */
1153 	intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
1154 }
1155 
1156 static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
1157 {
1158 	u32 cur = intel_gt_tlb_seqno(gt);
1159 
1160 	/* Only skip if a *full* TLB invalidate barrier has passed */
1161 	return (s32)(cur - ALIGN(seqno, 2)) > 0;
1162 }
1163 
1164 void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno)
1165 {
1166 	intel_wakeref_t wakeref;
1167 
1168 	if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
1169 		return;
1170 
1171 	if (intel_gt_is_wedged(gt))
1172 		return;
1173 
1174 	if (tlb_seqno_passed(gt, seqno))
1175 		return;
1176 
1177 	with_intel_gt_pm_if_awake(gt, wakeref) {
1178 		mutex_lock(&gt->tlb.invalidate_lock);
1179 		if (tlb_seqno_passed(gt, seqno))
1180 			goto unlock;
1181 
1182 		mmio_invalidate_full(gt);
1183 
1184 		write_seqcount_invalidate(&gt->tlb.seqno);
1185 unlock:
1186 		mutex_unlock(&gt->tlb.invalidate_lock);
1187 	}
1188 }
1189