xref: /openbmc/linux/drivers/gpu/drm/i915/gt/intel_reset.c (revision fcbd8037f7df694aa7bfb7ce82c0c7f5e53e7b7b)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2008-2018 Intel Corporation
5  */
6 
7 #include <linux/sched/mm.h>
8 #include <linux/stop_machine.h>
9 
10 #include "display/intel_display_types.h"
11 #include "display/intel_overlay.h"
12 
13 #include "gem/i915_gem_context.h"
14 
15 #include "i915_drv.h"
16 #include "i915_gpu_error.h"
17 #include "i915_irq.h"
18 #include "intel_engine_pm.h"
19 #include "intel_gt.h"
20 #include "intel_gt_pm.h"
21 #include "intel_reset.h"
22 
23 #include "uc/intel_guc.h"
24 
25 #define RESET_MAX_RETRIES 3
26 
27 /* XXX How to handle concurrent GGTT updates using tiling registers? */
28 #define RESET_UNDER_STOP_MACHINE 0
29 
30 static void rmw_set_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 set)
31 {
32 	intel_uncore_rmw_fw(uncore, reg, 0, set);
33 }
34 
35 static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
36 {
37 	intel_uncore_rmw_fw(uncore, reg, clr, 0);
38 }
39 
40 static void engine_skip_context(struct i915_request *rq)
41 {
42 	struct intel_engine_cs *engine = rq->engine;
43 	struct i915_gem_context *hung_ctx = rq->gem_context;
44 
45 	if (!i915_request_is_active(rq))
46 		return;
47 
48 	lockdep_assert_held(&engine->active.lock);
49 	list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
50 		if (rq->gem_context == hung_ctx)
51 			i915_request_skip(rq, -EIO);
52 }
53 
54 static void client_mark_guilty(struct drm_i915_file_private *file_priv,
55 			       const struct i915_gem_context *ctx)
56 {
57 	unsigned int score;
58 	unsigned long prev_hang;
59 
60 	if (i915_gem_context_is_banned(ctx))
61 		score = I915_CLIENT_SCORE_CONTEXT_BAN;
62 	else
63 		score = 0;
64 
65 	prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
66 	if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
67 		score += I915_CLIENT_SCORE_HANG_FAST;
68 
69 	if (score) {
70 		atomic_add(score, &file_priv->ban_score);
71 
72 		DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
73 				 ctx->name, score,
74 				 atomic_read(&file_priv->ban_score));
75 	}
76 }
77 
78 static bool context_mark_guilty(struct i915_gem_context *ctx)
79 {
80 	unsigned long prev_hang;
81 	bool banned;
82 	int i;
83 
84 	atomic_inc(&ctx->guilty_count);
85 
86 	/* Cool contexts are too cool to be banned! (Used for reset testing.) */
87 	if (!i915_gem_context_is_bannable(ctx))
88 		return false;
89 
90 	/* Record the timestamp for the last N hangs */
91 	prev_hang = ctx->hang_timestamp[0];
92 	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp) - 1; i++)
93 		ctx->hang_timestamp[i] = ctx->hang_timestamp[i + 1];
94 	ctx->hang_timestamp[i] = jiffies;
95 
96 	/* If we have hung N+1 times in rapid succession, we ban the context! */
97 	banned = !i915_gem_context_is_recoverable(ctx);
98 	if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES))
99 		banned = true;
100 	if (banned) {
101 		DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n",
102 				 ctx->name, atomic_read(&ctx->guilty_count));
103 		i915_gem_context_set_banned(ctx);
104 	}
105 
106 	if (!IS_ERR_OR_NULL(ctx->file_priv))
107 		client_mark_guilty(ctx->file_priv, ctx);
108 
109 	return banned;
110 }
111 
112 static void context_mark_innocent(struct i915_gem_context *ctx)
113 {
114 	atomic_inc(&ctx->active_count);
115 }
116 
117 void __i915_request_reset(struct i915_request *rq, bool guilty)
118 {
119 	GEM_TRACE("%s rq=%llx:%lld, guilty? %s\n",
120 		  rq->engine->name,
121 		  rq->fence.context,
122 		  rq->fence.seqno,
123 		  yesno(guilty));
124 
125 	GEM_BUG_ON(i915_request_completed(rq));
126 
127 	if (guilty) {
128 		i915_request_skip(rq, -EIO);
129 		if (context_mark_guilty(rq->gem_context))
130 			engine_skip_context(rq);
131 	} else {
132 		dma_fence_set_error(&rq->fence, -EAGAIN);
133 		context_mark_innocent(rq->gem_context);
134 	}
135 }
136 
137 static bool i915_in_reset(struct pci_dev *pdev)
138 {
139 	u8 gdrst;
140 
141 	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
142 	return gdrst & GRDOM_RESET_STATUS;
143 }
144 
145 static int i915_do_reset(struct intel_gt *gt,
146 			 intel_engine_mask_t engine_mask,
147 			 unsigned int retry)
148 {
149 	struct pci_dev *pdev = gt->i915->drm.pdev;
150 	int err;
151 
152 	/* Assert reset for at least 20 usec, and wait for acknowledgement. */
153 	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
154 	udelay(50);
155 	err = wait_for_atomic(i915_in_reset(pdev), 50);
156 
157 	/* Clear the reset request. */
158 	pci_write_config_byte(pdev, I915_GDRST, 0);
159 	udelay(50);
160 	if (!err)
161 		err = wait_for_atomic(!i915_in_reset(pdev), 50);
162 
163 	return err;
164 }
165 
166 static bool g4x_reset_complete(struct pci_dev *pdev)
167 {
168 	u8 gdrst;
169 
170 	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
171 	return (gdrst & GRDOM_RESET_ENABLE) == 0;
172 }
173 
174 static int g33_do_reset(struct intel_gt *gt,
175 			intel_engine_mask_t engine_mask,
176 			unsigned int retry)
177 {
178 	struct pci_dev *pdev = gt->i915->drm.pdev;
179 
180 	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
181 	return wait_for_atomic(g4x_reset_complete(pdev), 50);
182 }
183 
184 static int g4x_do_reset(struct intel_gt *gt,
185 			intel_engine_mask_t engine_mask,
186 			unsigned int retry)
187 {
188 	struct pci_dev *pdev = gt->i915->drm.pdev;
189 	struct intel_uncore *uncore = gt->uncore;
190 	int ret;
191 
192 	/* WaVcpClkGateDisableForMediaReset:ctg,elk */
193 	rmw_set_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE);
194 	intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D);
195 
196 	pci_write_config_byte(pdev, I915_GDRST,
197 			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
198 	ret =  wait_for_atomic(g4x_reset_complete(pdev), 50);
199 	if (ret) {
200 		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
201 		goto out;
202 	}
203 
204 	pci_write_config_byte(pdev, I915_GDRST,
205 			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
206 	ret =  wait_for_atomic(g4x_reset_complete(pdev), 50);
207 	if (ret) {
208 		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
209 		goto out;
210 	}
211 
212 out:
213 	pci_write_config_byte(pdev, I915_GDRST, 0);
214 
215 	rmw_clear_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE);
216 	intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D);
217 
218 	return ret;
219 }
220 
221 static int ironlake_do_reset(struct intel_gt *gt,
222 			     intel_engine_mask_t engine_mask,
223 			     unsigned int retry)
224 {
225 	struct intel_uncore *uncore = gt->uncore;
226 	int ret;
227 
228 	intel_uncore_write_fw(uncore, ILK_GDSR,
229 			      ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
230 	ret = __intel_wait_for_register_fw(uncore, ILK_GDSR,
231 					   ILK_GRDOM_RESET_ENABLE, 0,
232 					   5000, 0,
233 					   NULL);
234 	if (ret) {
235 		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
236 		goto out;
237 	}
238 
239 	intel_uncore_write_fw(uncore, ILK_GDSR,
240 			      ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
241 	ret = __intel_wait_for_register_fw(uncore, ILK_GDSR,
242 					   ILK_GRDOM_RESET_ENABLE, 0,
243 					   5000, 0,
244 					   NULL);
245 	if (ret) {
246 		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
247 		goto out;
248 	}
249 
250 out:
251 	intel_uncore_write_fw(uncore, ILK_GDSR, 0);
252 	intel_uncore_posting_read_fw(uncore, ILK_GDSR);
253 	return ret;
254 }
255 
256 /* Reset the hardware domains (GENX_GRDOM_*) specified by mask */
257 static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
258 {
259 	struct intel_uncore *uncore = gt->uncore;
260 	int err;
261 
262 	/*
263 	 * GEN6_GDRST is not in the gt power well, no need to check
264 	 * for fifo space for the write or forcewake the chip for
265 	 * the read
266 	 */
267 	intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
268 
269 	/* Wait for the device to ack the reset requests */
270 	err = __intel_wait_for_register_fw(uncore,
271 					   GEN6_GDRST, hw_domain_mask, 0,
272 					   500, 0,
273 					   NULL);
274 	if (err)
275 		DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n",
276 				 hw_domain_mask);
277 
278 	return err;
279 }
280 
281 static int gen6_reset_engines(struct intel_gt *gt,
282 			      intel_engine_mask_t engine_mask,
283 			      unsigned int retry)
284 {
285 	struct intel_engine_cs *engine;
286 	const u32 hw_engine_mask[] = {
287 		[RCS0]  = GEN6_GRDOM_RENDER,
288 		[BCS0]  = GEN6_GRDOM_BLT,
289 		[VCS0]  = GEN6_GRDOM_MEDIA,
290 		[VCS1]  = GEN8_GRDOM_MEDIA2,
291 		[VECS0] = GEN6_GRDOM_VECS,
292 	};
293 	u32 hw_mask;
294 
295 	if (engine_mask == ALL_ENGINES) {
296 		hw_mask = GEN6_GRDOM_FULL;
297 	} else {
298 		intel_engine_mask_t tmp;
299 
300 		hw_mask = 0;
301 		for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
302 			GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
303 			hw_mask |= hw_engine_mask[engine->id];
304 		}
305 	}
306 
307 	return gen6_hw_domain_reset(gt, hw_mask);
308 }
309 
310 static u32 gen11_lock_sfc(struct intel_engine_cs *engine)
311 {
312 	struct intel_uncore *uncore = engine->uncore;
313 	u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access;
314 	i915_reg_t sfc_forced_lock, sfc_forced_lock_ack;
315 	u32 sfc_forced_lock_bit, sfc_forced_lock_ack_bit;
316 	i915_reg_t sfc_usage;
317 	u32 sfc_usage_bit;
318 	u32 sfc_reset_bit;
319 
320 	switch (engine->class) {
321 	case VIDEO_DECODE_CLASS:
322 		if ((BIT(engine->instance) & vdbox_sfc_access) == 0)
323 			return 0;
324 
325 		sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine);
326 		sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
327 
328 		sfc_forced_lock_ack = GEN11_VCS_SFC_LOCK_STATUS(engine);
329 		sfc_forced_lock_ack_bit  = GEN11_VCS_SFC_LOCK_ACK_BIT;
330 
331 		sfc_usage = GEN11_VCS_SFC_LOCK_STATUS(engine);
332 		sfc_usage_bit = GEN11_VCS_SFC_USAGE_BIT;
333 		sfc_reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance);
334 		break;
335 
336 	case VIDEO_ENHANCEMENT_CLASS:
337 		sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine);
338 		sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
339 
340 		sfc_forced_lock_ack = GEN11_VECS_SFC_LOCK_ACK(engine);
341 		sfc_forced_lock_ack_bit  = GEN11_VECS_SFC_LOCK_ACK_BIT;
342 
343 		sfc_usage = GEN11_VECS_SFC_USAGE(engine);
344 		sfc_usage_bit = GEN11_VECS_SFC_USAGE_BIT;
345 		sfc_reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance);
346 		break;
347 
348 	default:
349 		return 0;
350 	}
351 
352 	/*
353 	 * Tell the engine that a software reset is going to happen. The engine
354 	 * will then try to force lock the SFC (if currently locked, it will
355 	 * remain so until we tell the engine it is safe to unlock; if currently
356 	 * unlocked, it will ignore this and all new lock requests). If SFC
357 	 * ends up being locked to the engine we want to reset, we have to reset
358 	 * it as well (we will unlock it once the reset sequence is completed).
359 	 */
360 	rmw_set_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit);
361 
362 	if (__intel_wait_for_register_fw(uncore,
363 					 sfc_forced_lock_ack,
364 					 sfc_forced_lock_ack_bit,
365 					 sfc_forced_lock_ack_bit,
366 					 1000, 0, NULL)) {
367 		DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n");
368 		return 0;
369 	}
370 
371 	if (intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)
372 		return sfc_reset_bit;
373 
374 	return 0;
375 }
376 
377 static void gen11_unlock_sfc(struct intel_engine_cs *engine)
378 {
379 	struct intel_uncore *uncore = engine->uncore;
380 	u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access;
381 	i915_reg_t sfc_forced_lock;
382 	u32 sfc_forced_lock_bit;
383 
384 	switch (engine->class) {
385 	case VIDEO_DECODE_CLASS:
386 		if ((BIT(engine->instance) & vdbox_sfc_access) == 0)
387 			return;
388 
389 		sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine);
390 		sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
391 		break;
392 
393 	case VIDEO_ENHANCEMENT_CLASS:
394 		sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine);
395 		sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
396 		break;
397 
398 	default:
399 		return;
400 	}
401 
402 	rmw_clear_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit);
403 }
404 
405 static int gen11_reset_engines(struct intel_gt *gt,
406 			       intel_engine_mask_t engine_mask,
407 			       unsigned int retry)
408 {
409 	const u32 hw_engine_mask[] = {
410 		[RCS0]  = GEN11_GRDOM_RENDER,
411 		[BCS0]  = GEN11_GRDOM_BLT,
412 		[VCS0]  = GEN11_GRDOM_MEDIA,
413 		[VCS1]  = GEN11_GRDOM_MEDIA2,
414 		[VCS2]  = GEN11_GRDOM_MEDIA3,
415 		[VCS3]  = GEN11_GRDOM_MEDIA4,
416 		[VECS0] = GEN11_GRDOM_VECS,
417 		[VECS1] = GEN11_GRDOM_VECS2,
418 	};
419 	struct intel_engine_cs *engine;
420 	intel_engine_mask_t tmp;
421 	u32 hw_mask;
422 	int ret;
423 
424 	if (engine_mask == ALL_ENGINES) {
425 		hw_mask = GEN11_GRDOM_FULL;
426 	} else {
427 		hw_mask = 0;
428 		for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
429 			GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
430 			hw_mask |= hw_engine_mask[engine->id];
431 			hw_mask |= gen11_lock_sfc(engine);
432 		}
433 	}
434 
435 	ret = gen6_hw_domain_reset(gt, hw_mask);
436 
437 	if (engine_mask != ALL_ENGINES)
438 		for_each_engine_masked(engine, gt->i915, engine_mask, tmp)
439 			gen11_unlock_sfc(engine);
440 
441 	return ret;
442 }
443 
444 static int gen8_engine_reset_prepare(struct intel_engine_cs *engine)
445 {
446 	struct intel_uncore *uncore = engine->uncore;
447 	const i915_reg_t reg = RING_RESET_CTL(engine->mmio_base);
448 	u32 request, mask, ack;
449 	int ret;
450 
451 	ack = intel_uncore_read_fw(uncore, reg);
452 	if (ack & RESET_CTL_CAT_ERROR) {
453 		/*
454 		 * For catastrophic errors, ready-for-reset sequence
455 		 * needs to be bypassed: HAS#396813
456 		 */
457 		request = RESET_CTL_CAT_ERROR;
458 		mask = RESET_CTL_CAT_ERROR;
459 
460 		/* Catastrophic errors need to be cleared by HW */
461 		ack = 0;
462 	} else if (!(ack & RESET_CTL_READY_TO_RESET)) {
463 		request = RESET_CTL_REQUEST_RESET;
464 		mask = RESET_CTL_READY_TO_RESET;
465 		ack = RESET_CTL_READY_TO_RESET;
466 	} else {
467 		return 0;
468 	}
469 
470 	intel_uncore_write_fw(uncore, reg, _MASKED_BIT_ENABLE(request));
471 	ret = __intel_wait_for_register_fw(uncore, reg, mask, ack,
472 					   700, 0, NULL);
473 	if (ret)
474 		DRM_ERROR("%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n",
475 			  engine->name, request,
476 			  intel_uncore_read_fw(uncore, reg));
477 
478 	return ret;
479 }
480 
481 static void gen8_engine_reset_cancel(struct intel_engine_cs *engine)
482 {
483 	intel_uncore_write_fw(engine->uncore,
484 			      RING_RESET_CTL(engine->mmio_base),
485 			      _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET));
486 }
487 
488 static int gen8_reset_engines(struct intel_gt *gt,
489 			      intel_engine_mask_t engine_mask,
490 			      unsigned int retry)
491 {
492 	struct intel_engine_cs *engine;
493 	const bool reset_non_ready = retry >= 1;
494 	intel_engine_mask_t tmp;
495 	int ret;
496 
497 	for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
498 		ret = gen8_engine_reset_prepare(engine);
499 		if (ret && !reset_non_ready)
500 			goto skip_reset;
501 
502 		/*
503 		 * If this is not the first failed attempt to prepare,
504 		 * we decide to proceed anyway.
505 		 *
506 		 * By doing so we risk context corruption and with
507 		 * some gens (kbl), possible system hang if reset
508 		 * happens during active bb execution.
509 		 *
510 		 * We rather take context corruption instead of
511 		 * failed reset with a wedged driver/gpu. And
512 		 * active bb execution case should be covered by
513 		 * stop_engines() we have before the reset.
514 		 */
515 	}
516 
517 	if (INTEL_GEN(gt->i915) >= 11)
518 		ret = gen11_reset_engines(gt, engine_mask, retry);
519 	else
520 		ret = gen6_reset_engines(gt, engine_mask, retry);
521 
522 skip_reset:
523 	for_each_engine_masked(engine, gt->i915, engine_mask, tmp)
524 		gen8_engine_reset_cancel(engine);
525 
526 	return ret;
527 }
528 
529 typedef int (*reset_func)(struct intel_gt *,
530 			  intel_engine_mask_t engine_mask,
531 			  unsigned int retry);
532 
533 static reset_func intel_get_gpu_reset(struct drm_i915_private *i915)
534 {
535 	if (INTEL_GEN(i915) >= 8)
536 		return gen8_reset_engines;
537 	else if (INTEL_GEN(i915) >= 6)
538 		return gen6_reset_engines;
539 	else if (INTEL_GEN(i915) >= 5)
540 		return ironlake_do_reset;
541 	else if (IS_G4X(i915))
542 		return g4x_do_reset;
543 	else if (IS_G33(i915) || IS_PINEVIEW(i915))
544 		return g33_do_reset;
545 	else if (INTEL_GEN(i915) >= 3)
546 		return i915_do_reset;
547 	else
548 		return NULL;
549 }
550 
551 int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
552 {
553 	const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1;
554 	reset_func reset;
555 	int ret = -ETIMEDOUT;
556 	int retry;
557 
558 	reset = intel_get_gpu_reset(gt->i915);
559 	if (!reset)
560 		return -ENODEV;
561 
562 	/*
563 	 * If the power well sleeps during the reset, the reset
564 	 * request may be dropped and never completes (causing -EIO).
565 	 */
566 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
567 	for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {
568 		GEM_TRACE("engine_mask=%x\n", engine_mask);
569 		preempt_disable();
570 		ret = reset(gt, engine_mask, retry);
571 		preempt_enable();
572 	}
573 	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
574 
575 	return ret;
576 }
577 
578 bool intel_has_gpu_reset(struct drm_i915_private *i915)
579 {
580 	if (!i915_modparams.reset)
581 		return NULL;
582 
583 	return intel_get_gpu_reset(i915);
584 }
585 
586 bool intel_has_reset_engine(struct drm_i915_private *i915)
587 {
588 	return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2;
589 }
590 
591 int intel_reset_guc(struct intel_gt *gt)
592 {
593 	u32 guc_domain =
594 		INTEL_GEN(gt->i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC;
595 	int ret;
596 
597 	GEM_BUG_ON(!HAS_GT_UC(gt->i915));
598 
599 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
600 	ret = gen6_hw_domain_reset(gt, guc_domain);
601 	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
602 
603 	return ret;
604 }
605 
606 /*
607  * Ensure irq handler finishes, and not run again.
608  * Also return the active request so that we only search for it once.
609  */
610 static void reset_prepare_engine(struct intel_engine_cs *engine)
611 {
612 	/*
613 	 * During the reset sequence, we must prevent the engine from
614 	 * entering RC6. As the context state is undefined until we restart
615 	 * the engine, if it does enter RC6 during the reset, the state
616 	 * written to the powercontext is undefined and so we may lose
617 	 * GPU state upon resume, i.e. fail to restart after a reset.
618 	 */
619 	intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
620 	engine->reset.prepare(engine);
621 }
622 
623 static void revoke_mmaps(struct intel_gt *gt)
624 {
625 	int i;
626 
627 	for (i = 0; i < gt->ggtt->num_fences; i++) {
628 		struct drm_vma_offset_node *node;
629 		struct i915_vma *vma;
630 		u64 vma_offset;
631 
632 		vma = READ_ONCE(gt->ggtt->fence_regs[i].vma);
633 		if (!vma)
634 			continue;
635 
636 		if (!i915_vma_has_userfault(vma))
637 			continue;
638 
639 		GEM_BUG_ON(vma->fence != &gt->ggtt->fence_regs[i]);
640 		node = &vma->obj->base.vma_node;
641 		vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT;
642 		unmap_mapping_range(gt->i915->drm.anon_inode->i_mapping,
643 				    drm_vma_node_offset_addr(node) + vma_offset,
644 				    vma->size,
645 				    1);
646 	}
647 }
648 
649 static intel_engine_mask_t reset_prepare(struct intel_gt *gt)
650 {
651 	struct intel_engine_cs *engine;
652 	intel_engine_mask_t awake = 0;
653 	enum intel_engine_id id;
654 
655 	for_each_engine(engine, gt->i915, id) {
656 		if (intel_engine_pm_get_if_awake(engine))
657 			awake |= engine->mask;
658 		reset_prepare_engine(engine);
659 	}
660 
661 	intel_uc_reset_prepare(&gt->uc);
662 
663 	return awake;
664 }
665 
666 static void gt_revoke(struct intel_gt *gt)
667 {
668 	revoke_mmaps(gt);
669 }
670 
671 static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
672 {
673 	struct intel_engine_cs *engine;
674 	enum intel_engine_id id;
675 	int err;
676 
677 	/*
678 	 * Everything depends on having the GTT running, so we need to start
679 	 * there.
680 	 */
681 	err = i915_ggtt_enable_hw(gt->i915);
682 	if (err)
683 		return err;
684 
685 	for_each_engine(engine, gt->i915, id)
686 		__intel_engine_reset(engine, stalled_mask & engine->mask);
687 
688 	i915_gem_restore_fences(gt->i915);
689 
690 	return err;
691 }
692 
693 static void reset_finish_engine(struct intel_engine_cs *engine)
694 {
695 	engine->reset.finish(engine);
696 	intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
697 
698 	intel_engine_signal_breadcrumbs(engine);
699 }
700 
701 static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake)
702 {
703 	struct intel_engine_cs *engine;
704 	enum intel_engine_id id;
705 
706 	for_each_engine(engine, gt->i915, id) {
707 		reset_finish_engine(engine);
708 		if (awake & engine->mask)
709 			intel_engine_pm_put(engine);
710 	}
711 }
712 
713 static void nop_submit_request(struct i915_request *request)
714 {
715 	struct intel_engine_cs *engine = request->engine;
716 	unsigned long flags;
717 
718 	GEM_TRACE("%s fence %llx:%lld -> -EIO\n",
719 		  engine->name, request->fence.context, request->fence.seqno);
720 	dma_fence_set_error(&request->fence, -EIO);
721 
722 	spin_lock_irqsave(&engine->active.lock, flags);
723 	__i915_request_submit(request);
724 	i915_request_mark_complete(request);
725 	spin_unlock_irqrestore(&engine->active.lock, flags);
726 
727 	intel_engine_queue_breadcrumbs(engine);
728 }
729 
730 static void __intel_gt_set_wedged(struct intel_gt *gt)
731 {
732 	struct intel_engine_cs *engine;
733 	intel_engine_mask_t awake;
734 	enum intel_engine_id id;
735 
736 	if (test_bit(I915_WEDGED, &gt->reset.flags))
737 		return;
738 
739 	if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(gt)) {
740 		struct drm_printer p = drm_debug_printer(__func__);
741 
742 		for_each_engine(engine, gt->i915, id)
743 			intel_engine_dump(engine, &p, "%s\n", engine->name);
744 	}
745 
746 	GEM_TRACE("start\n");
747 
748 	/*
749 	 * First, stop submission to hw, but do not yet complete requests by
750 	 * rolling the global seqno forward (since this would complete requests
751 	 * for which we haven't set the fence error to EIO yet).
752 	 */
753 	awake = reset_prepare(gt);
754 
755 	/* Even if the GPU reset fails, it should still stop the engines */
756 	if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
757 		__intel_gt_reset(gt, ALL_ENGINES);
758 
759 	for_each_engine(engine, gt->i915, id)
760 		engine->submit_request = nop_submit_request;
761 
762 	/*
763 	 * Make sure no request can slip through without getting completed by
764 	 * either this call here to intel_engine_write_global_seqno, or the one
765 	 * in nop_submit_request.
766 	 */
767 	synchronize_rcu_expedited();
768 	set_bit(I915_WEDGED, &gt->reset.flags);
769 
770 	/* Mark all executing requests as skipped */
771 	for_each_engine(engine, gt->i915, id)
772 		engine->cancel_requests(engine);
773 
774 	reset_finish(gt, awake);
775 
776 	GEM_TRACE("end\n");
777 }
778 
779 void intel_gt_set_wedged(struct intel_gt *gt)
780 {
781 	intel_wakeref_t wakeref;
782 
783 	mutex_lock(&gt->reset.mutex);
784 	with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
785 		__intel_gt_set_wedged(gt);
786 	mutex_unlock(&gt->reset.mutex);
787 }
788 
789 static bool __intel_gt_unset_wedged(struct intel_gt *gt)
790 {
791 	struct intel_gt_timelines *timelines = &gt->timelines;
792 	struct intel_timeline *tl;
793 	unsigned long flags;
794 
795 	if (!test_bit(I915_WEDGED, &gt->reset.flags))
796 		return true;
797 
798 	if (!gt->scratch) /* Never full initialised, recovery impossible */
799 		return false;
800 
801 	GEM_TRACE("start\n");
802 
803 	/*
804 	 * Before unwedging, make sure that all pending operations
805 	 * are flushed and errored out - we may have requests waiting upon
806 	 * third party fences. We marked all inflight requests as EIO, and
807 	 * every execbuf since returned EIO, for consistency we want all
808 	 * the currently pending requests to also be marked as EIO, which
809 	 * is done inside our nop_submit_request - and so we must wait.
810 	 *
811 	 * No more can be submitted until we reset the wedged bit.
812 	 */
813 	spin_lock_irqsave(&timelines->lock, flags);
814 	list_for_each_entry(tl, &timelines->active_list, link) {
815 		struct i915_request *rq;
816 
817 		rq = i915_active_request_get_unlocked(&tl->last_request);
818 		if (!rq)
819 			continue;
820 
821 		spin_unlock_irqrestore(&timelines->lock, flags);
822 
823 		/*
824 		 * All internal dependencies (i915_requests) will have
825 		 * been flushed by the set-wedge, but we may be stuck waiting
826 		 * for external fences. These should all be capped to 10s
827 		 * (I915_FENCE_TIMEOUT) so this wait should not be unbounded
828 		 * in the worst case.
829 		 */
830 		dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT);
831 		i915_request_put(rq);
832 
833 		/* Restart iteration after droping lock */
834 		spin_lock_irqsave(&timelines->lock, flags);
835 		tl = list_entry(&timelines->active_list, typeof(*tl), link);
836 	}
837 	spin_unlock_irqrestore(&timelines->lock, flags);
838 
839 	intel_gt_sanitize(gt, false);
840 
841 	/*
842 	 * Undo nop_submit_request. We prevent all new i915 requests from
843 	 * being queued (by disallowing execbuf whilst wedged) so having
844 	 * waited for all active requests above, we know the system is idle
845 	 * and do not have to worry about a thread being inside
846 	 * engine->submit_request() as we swap over. So unlike installing
847 	 * the nop_submit_request on reset, we can do this from normal
848 	 * context and do not require stop_machine().
849 	 */
850 	intel_engines_reset_default_submission(gt);
851 
852 	GEM_TRACE("end\n");
853 
854 	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
855 	clear_bit(I915_WEDGED, &gt->reset.flags);
856 
857 	return true;
858 }
859 
860 bool intel_gt_unset_wedged(struct intel_gt *gt)
861 {
862 	bool result;
863 
864 	mutex_lock(&gt->reset.mutex);
865 	result = __intel_gt_unset_wedged(gt);
866 	mutex_unlock(&gt->reset.mutex);
867 
868 	return result;
869 }
870 
871 static int do_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
872 {
873 	int err, i;
874 
875 	gt_revoke(gt);
876 
877 	err = __intel_gt_reset(gt, ALL_ENGINES);
878 	for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
879 		msleep(10 * (i + 1));
880 		err = __intel_gt_reset(gt, ALL_ENGINES);
881 	}
882 	if (err)
883 		return err;
884 
885 	return gt_reset(gt, stalled_mask);
886 }
887 
888 static int resume(struct intel_gt *gt)
889 {
890 	struct intel_engine_cs *engine;
891 	enum intel_engine_id id;
892 	int ret;
893 
894 	for_each_engine(engine, gt->i915, id) {
895 		ret = engine->resume(engine);
896 		if (ret)
897 			return ret;
898 	}
899 
900 	return 0;
901 }
902 
903 /**
904  * intel_gt_reset - reset chip after a hang
905  * @gt: #intel_gt to reset
906  * @stalled_mask: mask of the stalled engines with the guilty requests
907  * @reason: user error message for why we are resetting
908  *
909  * Reset the chip.  Useful if a hang is detected. Marks the device as wedged
910  * on failure.
911  *
912  * Procedure is fairly simple:
913  *   - reset the chip using the reset reg
914  *   - re-init context state
915  *   - re-init hardware status page
916  *   - re-init ring buffer
917  *   - re-init interrupt state
918  *   - re-init display
919  */
920 void intel_gt_reset(struct intel_gt *gt,
921 		    intel_engine_mask_t stalled_mask,
922 		    const char *reason)
923 {
924 	intel_engine_mask_t awake;
925 	int ret;
926 
927 	GEM_TRACE("flags=%lx\n", gt->reset.flags);
928 
929 	might_sleep();
930 	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &gt->reset.flags));
931 	mutex_lock(&gt->reset.mutex);
932 
933 	/* Clear any previous failed attempts at recovery. Time to try again. */
934 	if (!__intel_gt_unset_wedged(gt))
935 		goto unlock;
936 
937 	if (reason)
938 		dev_notice(gt->i915->drm.dev,
939 			   "Resetting chip for %s\n", reason);
940 	atomic_inc(&gt->i915->gpu_error.reset_count);
941 
942 	awake = reset_prepare(gt);
943 
944 	if (!intel_has_gpu_reset(gt->i915)) {
945 		if (i915_modparams.reset)
946 			dev_err(gt->i915->drm.dev, "GPU reset not supported\n");
947 		else
948 			DRM_DEBUG_DRIVER("GPU reset disabled\n");
949 		goto error;
950 	}
951 
952 	if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
953 		intel_runtime_pm_disable_interrupts(gt->i915);
954 
955 	if (do_reset(gt, stalled_mask)) {
956 		dev_err(gt->i915->drm.dev, "Failed to reset chip\n");
957 		goto taint;
958 	}
959 
960 	if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
961 		intel_runtime_pm_enable_interrupts(gt->i915);
962 
963 	intel_overlay_reset(gt->i915);
964 
965 	/*
966 	 * Next we need to restore the context, but we don't use those
967 	 * yet either...
968 	 *
969 	 * Ring buffer needs to be re-initialized in the KMS case, or if X
970 	 * was running at the time of the reset (i.e. we weren't VT
971 	 * switched away).
972 	 */
973 	ret = i915_gem_init_hw(gt->i915);
974 	if (ret) {
975 		DRM_ERROR("Failed to initialise HW following reset (%d)\n",
976 			  ret);
977 		goto taint;
978 	}
979 
980 	ret = resume(gt);
981 	if (ret)
982 		goto taint;
983 
984 	intel_gt_queue_hangcheck(gt);
985 
986 finish:
987 	reset_finish(gt, awake);
988 unlock:
989 	mutex_unlock(&gt->reset.mutex);
990 	return;
991 
992 taint:
993 	/*
994 	 * History tells us that if we cannot reset the GPU now, we
995 	 * never will. This then impacts everything that is run
996 	 * subsequently. On failing the reset, we mark the driver
997 	 * as wedged, preventing further execution on the GPU.
998 	 * We also want to go one step further and add a taint to the
999 	 * kernel so that any subsequent faults can be traced back to
1000 	 * this failure. This is important for CI, where if the
1001 	 * GPU/driver fails we would like to reboot and restart testing
1002 	 * rather than continue on into oblivion. For everyone else,
1003 	 * the system should still plod along, but they have been warned!
1004 	 */
1005 	add_taint_for_CI(TAINT_WARN);
1006 error:
1007 	__intel_gt_set_wedged(gt);
1008 	goto finish;
1009 }
1010 
1011 static inline int intel_gt_reset_engine(struct intel_engine_cs *engine)
1012 {
1013 	return __intel_gt_reset(engine->gt, engine->mask);
1014 }
1015 
1016 /**
1017  * intel_engine_reset - reset GPU engine to recover from a hang
1018  * @engine: engine to reset
1019  * @msg: reason for GPU reset; or NULL for no dev_notice()
1020  *
1021  * Reset a specific GPU engine. Useful if a hang is detected.
1022  * Returns zero on successful reset or otherwise an error code.
1023  *
1024  * Procedure is:
1025  *  - identifies the request that caused the hang and it is dropped
1026  *  - reset engine (which will force the engine to idle)
1027  *  - re-init/configure engine
1028  */
1029 int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
1030 {
1031 	struct intel_gt *gt = engine->gt;
1032 	int ret;
1033 
1034 	GEM_TRACE("%s flags=%lx\n", engine->name, gt->reset.flags);
1035 	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags));
1036 
1037 	if (!intel_engine_pm_get_if_awake(engine))
1038 		return 0;
1039 
1040 	reset_prepare_engine(engine);
1041 
1042 	if (msg)
1043 		dev_notice(engine->i915->drm.dev,
1044 			   "Resetting %s for %s\n", engine->name, msg);
1045 	atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
1046 
1047 	if (!engine->gt->uc.guc.execbuf_client)
1048 		ret = intel_gt_reset_engine(engine);
1049 	else
1050 		ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine);
1051 	if (ret) {
1052 		/* If we fail here, we expect to fallback to a global reset */
1053 		DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
1054 				 engine->gt->uc.guc.execbuf_client ? "GuC " : "",
1055 				 engine->name, ret);
1056 		goto out;
1057 	}
1058 
1059 	/*
1060 	 * The request that caused the hang is stuck on elsp, we know the
1061 	 * active request and can drop it, adjust head to skip the offending
1062 	 * request to resume executing remaining requests in the queue.
1063 	 */
1064 	__intel_engine_reset(engine, true);
1065 
1066 	/*
1067 	 * The engine and its registers (and workarounds in case of render)
1068 	 * have been reset to their default values. Follow the init_ring
1069 	 * process to program RING_MODE, HWSP and re-enable submission.
1070 	 */
1071 	ret = engine->resume(engine);
1072 
1073 out:
1074 	intel_engine_cancel_stop_cs(engine);
1075 	reset_finish_engine(engine);
1076 	intel_engine_pm_put(engine);
1077 	return ret;
1078 }
1079 
1080 static void intel_gt_reset_global(struct intel_gt *gt,
1081 				  u32 engine_mask,
1082 				  const char *reason)
1083 {
1084 	struct kobject *kobj = &gt->i915->drm.primary->kdev->kobj;
1085 	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
1086 	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
1087 	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
1088 	struct intel_wedge_me w;
1089 
1090 	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
1091 
1092 	DRM_DEBUG_DRIVER("resetting chip\n");
1093 	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
1094 
1095 	/* Use a watchdog to ensure that our reset completes */
1096 	intel_wedge_on_timeout(&w, gt, 5 * HZ) {
1097 		intel_prepare_reset(gt->i915);
1098 
1099 		/* Flush everyone using a resource about to be clobbered */
1100 		synchronize_srcu_expedited(&gt->reset.backoff_srcu);
1101 
1102 		intel_gt_reset(gt, engine_mask, reason);
1103 
1104 		intel_finish_reset(gt->i915);
1105 	}
1106 
1107 	if (!test_bit(I915_WEDGED, &gt->reset.flags))
1108 		kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event);
1109 }
1110 
1111 /**
1112  * intel_gt_handle_error - handle a gpu error
1113  * @gt: the intel_gt
1114  * @engine_mask: mask representing engines that are hung
1115  * @flags: control flags
1116  * @fmt: Error message format string
1117  *
1118  * Do some basic checking of register state at error time and
1119  * dump it to the syslog.  Also call i915_capture_error_state() to make
1120  * sure we get a record and make it available in debugfs.  Fire a uevent
1121  * so userspace knows something bad happened (should trigger collection
1122  * of a ring dump etc.).
1123  */
1124 void intel_gt_handle_error(struct intel_gt *gt,
1125 			   intel_engine_mask_t engine_mask,
1126 			   unsigned long flags,
1127 			   const char *fmt, ...)
1128 {
1129 	struct intel_engine_cs *engine;
1130 	intel_wakeref_t wakeref;
1131 	intel_engine_mask_t tmp;
1132 	char error_msg[80];
1133 	char *msg = NULL;
1134 
1135 	if (fmt) {
1136 		va_list args;
1137 
1138 		va_start(args, fmt);
1139 		vscnprintf(error_msg, sizeof(error_msg), fmt, args);
1140 		va_end(args);
1141 
1142 		msg = error_msg;
1143 	}
1144 
1145 	/*
1146 	 * In most cases it's guaranteed that we get here with an RPM
1147 	 * reference held, for example because there is a pending GPU
1148 	 * request that won't finish until the reset is done. This
1149 	 * isn't the case at least when we get here by doing a
1150 	 * simulated reset via debugfs, so get an RPM reference.
1151 	 */
1152 	wakeref = intel_runtime_pm_get(&gt->i915->runtime_pm);
1153 
1154 	engine_mask &= INTEL_INFO(gt->i915)->engine_mask;
1155 
1156 	if (flags & I915_ERROR_CAPTURE) {
1157 		i915_capture_error_state(gt->i915, engine_mask, msg);
1158 		intel_gt_clear_error_registers(gt, engine_mask);
1159 	}
1160 
1161 	/*
1162 	 * Try engine reset when available. We fall back to full reset if
1163 	 * single reset fails.
1164 	 */
1165 	if (intel_has_reset_engine(gt->i915) && !intel_gt_is_wedged(gt)) {
1166 		for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
1167 			BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
1168 			if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
1169 					     &gt->reset.flags))
1170 				continue;
1171 
1172 			if (intel_engine_reset(engine, msg) == 0)
1173 				engine_mask &= ~engine->mask;
1174 
1175 			clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
1176 					      &gt->reset.flags);
1177 		}
1178 	}
1179 
1180 	if (!engine_mask)
1181 		goto out;
1182 
1183 	/* Full reset needs the mutex, stop any other user trying to do so. */
1184 	if (test_and_set_bit(I915_RESET_BACKOFF, &gt->reset.flags)) {
1185 		wait_event(gt->reset.queue,
1186 			   !test_bit(I915_RESET_BACKOFF, &gt->reset.flags));
1187 		goto out; /* piggy-back on the other reset */
1188 	}
1189 
1190 	/* Make sure i915_reset_trylock() sees the I915_RESET_BACKOFF */
1191 	synchronize_rcu_expedited();
1192 
1193 	/* Prevent any other reset-engine attempt. */
1194 	for_each_engine(engine, gt->i915, tmp) {
1195 		while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
1196 					&gt->reset.flags))
1197 			wait_on_bit(&gt->reset.flags,
1198 				    I915_RESET_ENGINE + engine->id,
1199 				    TASK_UNINTERRUPTIBLE);
1200 	}
1201 
1202 	intel_gt_reset_global(gt, engine_mask, msg);
1203 
1204 	for_each_engine(engine, gt->i915, tmp)
1205 		clear_bit_unlock(I915_RESET_ENGINE + engine->id,
1206 				 &gt->reset.flags);
1207 	clear_bit_unlock(I915_RESET_BACKOFF, &gt->reset.flags);
1208 	smp_mb__after_atomic();
1209 	wake_up_all(&gt->reset.queue);
1210 
1211 out:
1212 	intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
1213 }
1214 
1215 int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)
1216 {
1217 	might_lock(&gt->reset.backoff_srcu);
1218 	might_sleep();
1219 
1220 	rcu_read_lock();
1221 	while (test_bit(I915_RESET_BACKOFF, &gt->reset.flags)) {
1222 		rcu_read_unlock();
1223 
1224 		if (wait_event_interruptible(gt->reset.queue,
1225 					     !test_bit(I915_RESET_BACKOFF,
1226 						       &gt->reset.flags)))
1227 			return -EINTR;
1228 
1229 		rcu_read_lock();
1230 	}
1231 	*srcu = srcu_read_lock(&gt->reset.backoff_srcu);
1232 	rcu_read_unlock();
1233 
1234 	return 0;
1235 }
1236 
1237 void intel_gt_reset_unlock(struct intel_gt *gt, int tag)
1238 __releases(&gt->reset.backoff_srcu)
1239 {
1240 	srcu_read_unlock(&gt->reset.backoff_srcu, tag);
1241 }
1242 
1243 int intel_gt_terminally_wedged(struct intel_gt *gt)
1244 {
1245 	might_sleep();
1246 
1247 	if (!intel_gt_is_wedged(gt))
1248 		return 0;
1249 
1250 	/* Reset still in progress? Maybe we will recover? */
1251 	if (!test_bit(I915_RESET_BACKOFF, &gt->reset.flags))
1252 		return -EIO;
1253 
1254 	/* XXX intel_reset_finish() still takes struct_mutex!!! */
1255 	if (mutex_is_locked(&gt->i915->drm.struct_mutex))
1256 		return -EAGAIN;
1257 
1258 	if (wait_event_interruptible(gt->reset.queue,
1259 				     !test_bit(I915_RESET_BACKOFF,
1260 					       &gt->reset.flags)))
1261 		return -EINTR;
1262 
1263 	return intel_gt_is_wedged(gt) ? -EIO : 0;
1264 }
1265 
1266 void intel_gt_init_reset(struct intel_gt *gt)
1267 {
1268 	init_waitqueue_head(&gt->reset.queue);
1269 	mutex_init(&gt->reset.mutex);
1270 	init_srcu_struct(&gt->reset.backoff_srcu);
1271 }
1272 
1273 void intel_gt_fini_reset(struct intel_gt *gt)
1274 {
1275 	cleanup_srcu_struct(&gt->reset.backoff_srcu);
1276 }
1277 
1278 static void intel_wedge_me(struct work_struct *work)
1279 {
1280 	struct intel_wedge_me *w = container_of(work, typeof(*w), work.work);
1281 
1282 	dev_err(w->gt->i915->drm.dev,
1283 		"%s timed out, cancelling all in-flight rendering.\n",
1284 		w->name);
1285 	intel_gt_set_wedged(w->gt);
1286 }
1287 
1288 void __intel_init_wedge(struct intel_wedge_me *w,
1289 			struct intel_gt *gt,
1290 			long timeout,
1291 			const char *name)
1292 {
1293 	w->gt = gt;
1294 	w->name = name;
1295 
1296 	INIT_DELAYED_WORK_ONSTACK(&w->work, intel_wedge_me);
1297 	schedule_delayed_work(&w->work, timeout);
1298 }
1299 
1300 void __intel_fini_wedge(struct intel_wedge_me *w)
1301 {
1302 	cancel_delayed_work_sync(&w->work);
1303 	destroy_delayed_work_on_stack(&w->work);
1304 	w->gt = NULL;
1305 }
1306 
1307 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1308 #include "selftest_reset.c"
1309 #endif
1310