xref: /openbmc/linux/drivers/gpu/drm/i915/gt/intel_reset.c (revision 91db9311)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2008-2018 Intel Corporation
5  */
6 
7 #include <linux/sched/mm.h>
8 #include <linux/stop_machine.h>
9 
10 #include "display/intel_overlay.h"
11 
12 #include "gem/i915_gem_context.h"
13 
14 #include "i915_drv.h"
15 #include "i915_gpu_error.h"
16 #include "i915_irq.h"
17 #include "intel_engine_pm.h"
18 #include "intel_gt.h"
19 #include "intel_gt_pm.h"
20 #include "intel_reset.h"
21 
22 #include "uc/intel_guc.h"
23 
24 #define RESET_MAX_RETRIES 3
25 
26 /* XXX How to handle concurrent GGTT updates using tiling registers? */
27 #define RESET_UNDER_STOP_MACHINE 0
28 
29 static void rmw_set_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 set)
30 {
31 	intel_uncore_rmw_fw(uncore, reg, 0, set);
32 }
33 
34 static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
35 {
36 	intel_uncore_rmw_fw(uncore, reg, clr, 0);
37 }
38 
39 static void engine_skip_context(struct i915_request *rq)
40 {
41 	struct intel_engine_cs *engine = rq->engine;
42 	struct i915_gem_context *hung_ctx = rq->gem_context;
43 
44 	lockdep_assert_held(&engine->active.lock);
45 
46 	if (!i915_request_is_active(rq))
47 		return;
48 
49 	list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
50 		if (rq->gem_context == hung_ctx)
51 			i915_request_skip(rq, -EIO);
52 }
53 
54 static void client_mark_guilty(struct drm_i915_file_private *file_priv,
55 			       const struct i915_gem_context *ctx)
56 {
57 	unsigned int score;
58 	unsigned long prev_hang;
59 
60 	if (i915_gem_context_is_banned(ctx))
61 		score = I915_CLIENT_SCORE_CONTEXT_BAN;
62 	else
63 		score = 0;
64 
65 	prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
66 	if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
67 		score += I915_CLIENT_SCORE_HANG_FAST;
68 
69 	if (score) {
70 		atomic_add(score, &file_priv->ban_score);
71 
72 		DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
73 				 ctx->name, score,
74 				 atomic_read(&file_priv->ban_score));
75 	}
76 }
77 
78 static bool context_mark_guilty(struct i915_gem_context *ctx)
79 {
80 	unsigned long prev_hang;
81 	bool banned;
82 	int i;
83 
84 	atomic_inc(&ctx->guilty_count);
85 
86 	/* Cool contexts are too cool to be banned! (Used for reset testing.) */
87 	if (!i915_gem_context_is_bannable(ctx))
88 		return false;
89 
90 	/* Record the timestamp for the last N hangs */
91 	prev_hang = ctx->hang_timestamp[0];
92 	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp) - 1; i++)
93 		ctx->hang_timestamp[i] = ctx->hang_timestamp[i + 1];
94 	ctx->hang_timestamp[i] = jiffies;
95 
96 	/* If we have hung N+1 times in rapid succession, we ban the context! */
97 	banned = !i915_gem_context_is_recoverable(ctx);
98 	if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES))
99 		banned = true;
100 	if (banned) {
101 		DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n",
102 				 ctx->name, atomic_read(&ctx->guilty_count));
103 		i915_gem_context_set_banned(ctx);
104 	}
105 
106 	if (!IS_ERR_OR_NULL(ctx->file_priv))
107 		client_mark_guilty(ctx->file_priv, ctx);
108 
109 	return banned;
110 }
111 
112 static void context_mark_innocent(struct i915_gem_context *ctx)
113 {
114 	atomic_inc(&ctx->active_count);
115 }
116 
117 void __i915_request_reset(struct i915_request *rq, bool guilty)
118 {
119 	GEM_TRACE("%s rq=%llx:%lld, guilty? %s\n",
120 		  rq->engine->name,
121 		  rq->fence.context,
122 		  rq->fence.seqno,
123 		  yesno(guilty));
124 
125 	lockdep_assert_held(&rq->engine->active.lock);
126 	GEM_BUG_ON(i915_request_completed(rq));
127 
128 	if (guilty) {
129 		i915_request_skip(rq, -EIO);
130 		if (context_mark_guilty(rq->gem_context))
131 			engine_skip_context(rq);
132 	} else {
133 		dma_fence_set_error(&rq->fence, -EAGAIN);
134 		context_mark_innocent(rq->gem_context);
135 	}
136 }
137 
138 static bool i915_in_reset(struct pci_dev *pdev)
139 {
140 	u8 gdrst;
141 
142 	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
143 	return gdrst & GRDOM_RESET_STATUS;
144 }
145 
146 static int i915_do_reset(struct intel_gt *gt,
147 			 intel_engine_mask_t engine_mask,
148 			 unsigned int retry)
149 {
150 	struct pci_dev *pdev = gt->i915->drm.pdev;
151 	int err;
152 
153 	/* Assert reset for at least 20 usec, and wait for acknowledgement. */
154 	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
155 	udelay(50);
156 	err = wait_for_atomic(i915_in_reset(pdev), 50);
157 
158 	/* Clear the reset request. */
159 	pci_write_config_byte(pdev, I915_GDRST, 0);
160 	udelay(50);
161 	if (!err)
162 		err = wait_for_atomic(!i915_in_reset(pdev), 50);
163 
164 	return err;
165 }
166 
167 static bool g4x_reset_complete(struct pci_dev *pdev)
168 {
169 	u8 gdrst;
170 
171 	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
172 	return (gdrst & GRDOM_RESET_ENABLE) == 0;
173 }
174 
175 static int g33_do_reset(struct intel_gt *gt,
176 			intel_engine_mask_t engine_mask,
177 			unsigned int retry)
178 {
179 	struct pci_dev *pdev = gt->i915->drm.pdev;
180 
181 	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
182 	return wait_for_atomic(g4x_reset_complete(pdev), 50);
183 }
184 
185 static int g4x_do_reset(struct intel_gt *gt,
186 			intel_engine_mask_t engine_mask,
187 			unsigned int retry)
188 {
189 	struct pci_dev *pdev = gt->i915->drm.pdev;
190 	struct intel_uncore *uncore = gt->uncore;
191 	int ret;
192 
193 	/* WaVcpClkGateDisableForMediaReset:ctg,elk */
194 	rmw_set_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE);
195 	intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D);
196 
197 	pci_write_config_byte(pdev, I915_GDRST,
198 			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
199 	ret =  wait_for_atomic(g4x_reset_complete(pdev), 50);
200 	if (ret) {
201 		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
202 		goto out;
203 	}
204 
205 	pci_write_config_byte(pdev, I915_GDRST,
206 			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
207 	ret =  wait_for_atomic(g4x_reset_complete(pdev), 50);
208 	if (ret) {
209 		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
210 		goto out;
211 	}
212 
213 out:
214 	pci_write_config_byte(pdev, I915_GDRST, 0);
215 
216 	rmw_clear_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE);
217 	intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D);
218 
219 	return ret;
220 }
221 
222 static int ironlake_do_reset(struct intel_gt *gt,
223 			     intel_engine_mask_t engine_mask,
224 			     unsigned int retry)
225 {
226 	struct intel_uncore *uncore = gt->uncore;
227 	int ret;
228 
229 	intel_uncore_write_fw(uncore, ILK_GDSR,
230 			      ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
231 	ret = __intel_wait_for_register_fw(uncore, ILK_GDSR,
232 					   ILK_GRDOM_RESET_ENABLE, 0,
233 					   5000, 0,
234 					   NULL);
235 	if (ret) {
236 		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
237 		goto out;
238 	}
239 
240 	intel_uncore_write_fw(uncore, ILK_GDSR,
241 			      ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
242 	ret = __intel_wait_for_register_fw(uncore, ILK_GDSR,
243 					   ILK_GRDOM_RESET_ENABLE, 0,
244 					   5000, 0,
245 					   NULL);
246 	if (ret) {
247 		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
248 		goto out;
249 	}
250 
251 out:
252 	intel_uncore_write_fw(uncore, ILK_GDSR, 0);
253 	intel_uncore_posting_read_fw(uncore, ILK_GDSR);
254 	return ret;
255 }
256 
257 /* Reset the hardware domains (GENX_GRDOM_*) specified by mask */
258 static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
259 {
260 	struct intel_uncore *uncore = gt->uncore;
261 	int err;
262 
263 	/*
264 	 * GEN6_GDRST is not in the gt power well, no need to check
265 	 * for fifo space for the write or forcewake the chip for
266 	 * the read
267 	 */
268 	intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
269 
270 	/* Wait for the device to ack the reset requests */
271 	err = __intel_wait_for_register_fw(uncore,
272 					   GEN6_GDRST, hw_domain_mask, 0,
273 					   500, 0,
274 					   NULL);
275 	if (err)
276 		DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n",
277 				 hw_domain_mask);
278 
279 	return err;
280 }
281 
282 static int gen6_reset_engines(struct intel_gt *gt,
283 			      intel_engine_mask_t engine_mask,
284 			      unsigned int retry)
285 {
286 	struct intel_engine_cs *engine;
287 	const u32 hw_engine_mask[] = {
288 		[RCS0]  = GEN6_GRDOM_RENDER,
289 		[BCS0]  = GEN6_GRDOM_BLT,
290 		[VCS0]  = GEN6_GRDOM_MEDIA,
291 		[VCS1]  = GEN8_GRDOM_MEDIA2,
292 		[VECS0] = GEN6_GRDOM_VECS,
293 	};
294 	u32 hw_mask;
295 
296 	if (engine_mask == ALL_ENGINES) {
297 		hw_mask = GEN6_GRDOM_FULL;
298 	} else {
299 		intel_engine_mask_t tmp;
300 
301 		hw_mask = 0;
302 		for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
303 			GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
304 			hw_mask |= hw_engine_mask[engine->id];
305 		}
306 	}
307 
308 	return gen6_hw_domain_reset(gt, hw_mask);
309 }
310 
311 static u32 gen11_lock_sfc(struct intel_engine_cs *engine)
312 {
313 	struct intel_uncore *uncore = engine->uncore;
314 	u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access;
315 	i915_reg_t sfc_forced_lock, sfc_forced_lock_ack;
316 	u32 sfc_forced_lock_bit, sfc_forced_lock_ack_bit;
317 	i915_reg_t sfc_usage;
318 	u32 sfc_usage_bit;
319 	u32 sfc_reset_bit;
320 
321 	switch (engine->class) {
322 	case VIDEO_DECODE_CLASS:
323 		if ((BIT(engine->instance) & vdbox_sfc_access) == 0)
324 			return 0;
325 
326 		sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine);
327 		sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
328 
329 		sfc_forced_lock_ack = GEN11_VCS_SFC_LOCK_STATUS(engine);
330 		sfc_forced_lock_ack_bit  = GEN11_VCS_SFC_LOCK_ACK_BIT;
331 
332 		sfc_usage = GEN11_VCS_SFC_LOCK_STATUS(engine);
333 		sfc_usage_bit = GEN11_VCS_SFC_USAGE_BIT;
334 		sfc_reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance);
335 		break;
336 
337 	case VIDEO_ENHANCEMENT_CLASS:
338 		sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine);
339 		sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
340 
341 		sfc_forced_lock_ack = GEN11_VECS_SFC_LOCK_ACK(engine);
342 		sfc_forced_lock_ack_bit  = GEN11_VECS_SFC_LOCK_ACK_BIT;
343 
344 		sfc_usage = GEN11_VECS_SFC_USAGE(engine);
345 		sfc_usage_bit = GEN11_VECS_SFC_USAGE_BIT;
346 		sfc_reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance);
347 		break;
348 
349 	default:
350 		return 0;
351 	}
352 
353 	/*
354 	 * Tell the engine that a software reset is going to happen. The engine
355 	 * will then try to force lock the SFC (if currently locked, it will
356 	 * remain so until we tell the engine it is safe to unlock; if currently
357 	 * unlocked, it will ignore this and all new lock requests). If SFC
358 	 * ends up being locked to the engine we want to reset, we have to reset
359 	 * it as well (we will unlock it once the reset sequence is completed).
360 	 */
361 	rmw_set_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit);
362 
363 	if (__intel_wait_for_register_fw(uncore,
364 					 sfc_forced_lock_ack,
365 					 sfc_forced_lock_ack_bit,
366 					 sfc_forced_lock_ack_bit,
367 					 1000, 0, NULL)) {
368 		DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n");
369 		return 0;
370 	}
371 
372 	if (intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)
373 		return sfc_reset_bit;
374 
375 	return 0;
376 }
377 
378 static void gen11_unlock_sfc(struct intel_engine_cs *engine)
379 {
380 	struct intel_uncore *uncore = engine->uncore;
381 	u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access;
382 	i915_reg_t sfc_forced_lock;
383 	u32 sfc_forced_lock_bit;
384 
385 	switch (engine->class) {
386 	case VIDEO_DECODE_CLASS:
387 		if ((BIT(engine->instance) & vdbox_sfc_access) == 0)
388 			return;
389 
390 		sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine);
391 		sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
392 		break;
393 
394 	case VIDEO_ENHANCEMENT_CLASS:
395 		sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine);
396 		sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
397 		break;
398 
399 	default:
400 		return;
401 	}
402 
403 	rmw_clear_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit);
404 }
405 
406 static int gen11_reset_engines(struct intel_gt *gt,
407 			       intel_engine_mask_t engine_mask,
408 			       unsigned int retry)
409 {
410 	const u32 hw_engine_mask[] = {
411 		[RCS0]  = GEN11_GRDOM_RENDER,
412 		[BCS0]  = GEN11_GRDOM_BLT,
413 		[VCS0]  = GEN11_GRDOM_MEDIA,
414 		[VCS1]  = GEN11_GRDOM_MEDIA2,
415 		[VCS2]  = GEN11_GRDOM_MEDIA3,
416 		[VCS3]  = GEN11_GRDOM_MEDIA4,
417 		[VECS0] = GEN11_GRDOM_VECS,
418 		[VECS1] = GEN11_GRDOM_VECS2,
419 	};
420 	struct intel_engine_cs *engine;
421 	intel_engine_mask_t tmp;
422 	u32 hw_mask;
423 	int ret;
424 
425 	if (engine_mask == ALL_ENGINES) {
426 		hw_mask = GEN11_GRDOM_FULL;
427 	} else {
428 		hw_mask = 0;
429 		for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
430 			GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
431 			hw_mask |= hw_engine_mask[engine->id];
432 			hw_mask |= gen11_lock_sfc(engine);
433 		}
434 	}
435 
436 	ret = gen6_hw_domain_reset(gt, hw_mask);
437 
438 	if (engine_mask != ALL_ENGINES)
439 		for_each_engine_masked(engine, gt->i915, engine_mask, tmp)
440 			gen11_unlock_sfc(engine);
441 
442 	return ret;
443 }
444 
445 static int gen8_engine_reset_prepare(struct intel_engine_cs *engine)
446 {
447 	struct intel_uncore *uncore = engine->uncore;
448 	const i915_reg_t reg = RING_RESET_CTL(engine->mmio_base);
449 	u32 request, mask, ack;
450 	int ret;
451 
452 	ack = intel_uncore_read_fw(uncore, reg);
453 	if (ack & RESET_CTL_CAT_ERROR) {
454 		/*
455 		 * For catastrophic errors, ready-for-reset sequence
456 		 * needs to be bypassed: HAS#396813
457 		 */
458 		request = RESET_CTL_CAT_ERROR;
459 		mask = RESET_CTL_CAT_ERROR;
460 
461 		/* Catastrophic errors need to be cleared by HW */
462 		ack = 0;
463 	} else if (!(ack & RESET_CTL_READY_TO_RESET)) {
464 		request = RESET_CTL_REQUEST_RESET;
465 		mask = RESET_CTL_READY_TO_RESET;
466 		ack = RESET_CTL_READY_TO_RESET;
467 	} else {
468 		return 0;
469 	}
470 
471 	intel_uncore_write_fw(uncore, reg, _MASKED_BIT_ENABLE(request));
472 	ret = __intel_wait_for_register_fw(uncore, reg, mask, ack,
473 					   700, 0, NULL);
474 	if (ret)
475 		DRM_ERROR("%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n",
476 			  engine->name, request,
477 			  intel_uncore_read_fw(uncore, reg));
478 
479 	return ret;
480 }
481 
482 static void gen8_engine_reset_cancel(struct intel_engine_cs *engine)
483 {
484 	intel_uncore_write_fw(engine->uncore,
485 			      RING_RESET_CTL(engine->mmio_base),
486 			      _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET));
487 }
488 
489 static int gen8_reset_engines(struct intel_gt *gt,
490 			      intel_engine_mask_t engine_mask,
491 			      unsigned int retry)
492 {
493 	struct intel_engine_cs *engine;
494 	const bool reset_non_ready = retry >= 1;
495 	intel_engine_mask_t tmp;
496 	int ret;
497 
498 	for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
499 		ret = gen8_engine_reset_prepare(engine);
500 		if (ret && !reset_non_ready)
501 			goto skip_reset;
502 
503 		/*
504 		 * If this is not the first failed attempt to prepare,
505 		 * we decide to proceed anyway.
506 		 *
507 		 * By doing so we risk context corruption and with
508 		 * some gens (kbl), possible system hang if reset
509 		 * happens during active bb execution.
510 		 *
511 		 * We rather take context corruption instead of
512 		 * failed reset with a wedged driver/gpu. And
513 		 * active bb execution case should be covered by
514 		 * stop_engines() we have before the reset.
515 		 */
516 	}
517 
518 	if (INTEL_GEN(gt->i915) >= 11)
519 		ret = gen11_reset_engines(gt, engine_mask, retry);
520 	else
521 		ret = gen6_reset_engines(gt, engine_mask, retry);
522 
523 skip_reset:
524 	for_each_engine_masked(engine, gt->i915, engine_mask, tmp)
525 		gen8_engine_reset_cancel(engine);
526 
527 	return ret;
528 }
529 
530 typedef int (*reset_func)(struct intel_gt *,
531 			  intel_engine_mask_t engine_mask,
532 			  unsigned int retry);
533 
534 static reset_func intel_get_gpu_reset(struct drm_i915_private *i915)
535 {
536 	if (INTEL_GEN(i915) >= 8)
537 		return gen8_reset_engines;
538 	else if (INTEL_GEN(i915) >= 6)
539 		return gen6_reset_engines;
540 	else if (INTEL_GEN(i915) >= 5)
541 		return ironlake_do_reset;
542 	else if (IS_G4X(i915))
543 		return g4x_do_reset;
544 	else if (IS_G33(i915) || IS_PINEVIEW(i915))
545 		return g33_do_reset;
546 	else if (INTEL_GEN(i915) >= 3)
547 		return i915_do_reset;
548 	else
549 		return NULL;
550 }
551 
552 int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
553 {
554 	const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1;
555 	reset_func reset;
556 	int ret = -ETIMEDOUT;
557 	int retry;
558 
559 	reset = intel_get_gpu_reset(gt->i915);
560 	if (!reset)
561 		return -ENODEV;
562 
563 	/*
564 	 * If the power well sleeps during the reset, the reset
565 	 * request may be dropped and never completes (causing -EIO).
566 	 */
567 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
568 	for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {
569 		GEM_TRACE("engine_mask=%x\n", engine_mask);
570 		preempt_disable();
571 		ret = reset(gt, engine_mask, retry);
572 		preempt_enable();
573 	}
574 	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
575 
576 	return ret;
577 }
578 
579 bool intel_has_gpu_reset(struct drm_i915_private *i915)
580 {
581 	if (!i915_modparams.reset)
582 		return NULL;
583 
584 	return intel_get_gpu_reset(i915);
585 }
586 
587 bool intel_has_reset_engine(struct drm_i915_private *i915)
588 {
589 	return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2;
590 }
591 
592 int intel_reset_guc(struct intel_gt *gt)
593 {
594 	u32 guc_domain =
595 		INTEL_GEN(gt->i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC;
596 	int ret;
597 
598 	GEM_BUG_ON(!HAS_GT_UC(gt->i915));
599 
600 	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
601 	ret = gen6_hw_domain_reset(gt, guc_domain);
602 	intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
603 
604 	return ret;
605 }
606 
607 /*
608  * Ensure irq handler finishes, and not run again.
609  * Also return the active request so that we only search for it once.
610  */
611 static void reset_prepare_engine(struct intel_engine_cs *engine)
612 {
613 	/*
614 	 * During the reset sequence, we must prevent the engine from
615 	 * entering RC6. As the context state is undefined until we restart
616 	 * the engine, if it does enter RC6 during the reset, the state
617 	 * written to the powercontext is undefined and so we may lose
618 	 * GPU state upon resume, i.e. fail to restart after a reset.
619 	 */
620 	intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
621 	engine->reset.prepare(engine);
622 }
623 
624 static void revoke_mmaps(struct intel_gt *gt)
625 {
626 	int i;
627 
628 	for (i = 0; i < gt->ggtt->num_fences; i++) {
629 		struct drm_vma_offset_node *node;
630 		struct i915_vma *vma;
631 		u64 vma_offset;
632 
633 		vma = READ_ONCE(gt->ggtt->fence_regs[i].vma);
634 		if (!vma)
635 			continue;
636 
637 		if (!i915_vma_has_userfault(vma))
638 			continue;
639 
640 		GEM_BUG_ON(vma->fence != &gt->ggtt->fence_regs[i]);
641 		node = &vma->obj->base.vma_node;
642 		vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT;
643 		unmap_mapping_range(gt->i915->drm.anon_inode->i_mapping,
644 				    drm_vma_node_offset_addr(node) + vma_offset,
645 				    vma->size,
646 				    1);
647 	}
648 }
649 
650 static intel_engine_mask_t reset_prepare(struct intel_gt *gt)
651 {
652 	struct intel_engine_cs *engine;
653 	intel_engine_mask_t awake = 0;
654 	enum intel_engine_id id;
655 
656 	for_each_engine(engine, gt->i915, id) {
657 		if (intel_engine_pm_get_if_awake(engine))
658 			awake |= engine->mask;
659 		reset_prepare_engine(engine);
660 	}
661 
662 	intel_uc_reset_prepare(&gt->uc);
663 
664 	return awake;
665 }
666 
667 static void gt_revoke(struct intel_gt *gt)
668 {
669 	revoke_mmaps(gt);
670 }
671 
672 static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
673 {
674 	struct intel_engine_cs *engine;
675 	enum intel_engine_id id;
676 	int err;
677 
678 	/*
679 	 * Everything depends on having the GTT running, so we need to start
680 	 * there.
681 	 */
682 	err = i915_ggtt_enable_hw(gt->i915);
683 	if (err)
684 		return err;
685 
686 	for_each_engine(engine, gt->i915, id)
687 		__intel_engine_reset(engine, stalled_mask & engine->mask);
688 
689 	i915_gem_restore_fences(gt->i915);
690 
691 	return err;
692 }
693 
694 static void reset_finish_engine(struct intel_engine_cs *engine)
695 {
696 	engine->reset.finish(engine);
697 	intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
698 
699 	intel_engine_signal_breadcrumbs(engine);
700 }
701 
702 static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake)
703 {
704 	struct intel_engine_cs *engine;
705 	enum intel_engine_id id;
706 
707 	for_each_engine(engine, gt->i915, id) {
708 		reset_finish_engine(engine);
709 		if (awake & engine->mask)
710 			intel_engine_pm_put(engine);
711 	}
712 }
713 
714 static void nop_submit_request(struct i915_request *request)
715 {
716 	struct intel_engine_cs *engine = request->engine;
717 	unsigned long flags;
718 
719 	GEM_TRACE("%s fence %llx:%lld -> -EIO\n",
720 		  engine->name, request->fence.context, request->fence.seqno);
721 	dma_fence_set_error(&request->fence, -EIO);
722 
723 	spin_lock_irqsave(&engine->active.lock, flags);
724 	__i915_request_submit(request);
725 	i915_request_mark_complete(request);
726 	spin_unlock_irqrestore(&engine->active.lock, flags);
727 
728 	intel_engine_queue_breadcrumbs(engine);
729 }
730 
731 static void __intel_gt_set_wedged(struct intel_gt *gt)
732 {
733 	struct intel_engine_cs *engine;
734 	intel_engine_mask_t awake;
735 	enum intel_engine_id id;
736 
737 	if (test_bit(I915_WEDGED, &gt->reset.flags))
738 		return;
739 
740 	if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(gt)) {
741 		struct drm_printer p = drm_debug_printer(__func__);
742 
743 		for_each_engine(engine, gt->i915, id)
744 			intel_engine_dump(engine, &p, "%s\n", engine->name);
745 	}
746 
747 	GEM_TRACE("start\n");
748 
749 	/*
750 	 * First, stop submission to hw, but do not yet complete requests by
751 	 * rolling the global seqno forward (since this would complete requests
752 	 * for which we haven't set the fence error to EIO yet).
753 	 */
754 	awake = reset_prepare(gt);
755 
756 	/* Even if the GPU reset fails, it should still stop the engines */
757 	if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
758 		__intel_gt_reset(gt, ALL_ENGINES);
759 
760 	for_each_engine(engine, gt->i915, id) {
761 		engine->submit_request = nop_submit_request;
762 		engine->schedule = NULL;
763 	}
764 	gt->i915->caps.scheduler = 0;
765 
766 	/*
767 	 * Make sure no request can slip through without getting completed by
768 	 * either this call here to intel_engine_write_global_seqno, or the one
769 	 * in nop_submit_request.
770 	 */
771 	synchronize_rcu_expedited();
772 	set_bit(I915_WEDGED, &gt->reset.flags);
773 
774 	/* Mark all executing requests as skipped */
775 	for_each_engine(engine, gt->i915, id)
776 		engine->cancel_requests(engine);
777 
778 	reset_finish(gt, awake);
779 
780 	GEM_TRACE("end\n");
781 }
782 
783 void intel_gt_set_wedged(struct intel_gt *gt)
784 {
785 	intel_wakeref_t wakeref;
786 
787 	mutex_lock(&gt->reset.mutex);
788 	with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
789 		__intel_gt_set_wedged(gt);
790 	mutex_unlock(&gt->reset.mutex);
791 }
792 
793 static bool __intel_gt_unset_wedged(struct intel_gt *gt)
794 {
795 	struct intel_gt_timelines *timelines = &gt->timelines;
796 	struct intel_timeline *tl;
797 
798 	if (!test_bit(I915_WEDGED, &gt->reset.flags))
799 		return true;
800 
801 	if (!gt->scratch) /* Never full initialised, recovery impossible */
802 		return false;
803 
804 	GEM_TRACE("start\n");
805 
806 	/*
807 	 * Before unwedging, make sure that all pending operations
808 	 * are flushed and errored out - we may have requests waiting upon
809 	 * third party fences. We marked all inflight requests as EIO, and
810 	 * every execbuf since returned EIO, for consistency we want all
811 	 * the currently pending requests to also be marked as EIO, which
812 	 * is done inside our nop_submit_request - and so we must wait.
813 	 *
814 	 * No more can be submitted until we reset the wedged bit.
815 	 */
816 	mutex_lock(&timelines->mutex);
817 	list_for_each_entry(tl, &timelines->active_list, link) {
818 		struct i915_request *rq;
819 
820 		rq = i915_active_request_get_unlocked(&tl->last_request);
821 		if (!rq)
822 			continue;
823 
824 		/*
825 		 * All internal dependencies (i915_requests) will have
826 		 * been flushed by the set-wedge, but we may be stuck waiting
827 		 * for external fences. These should all be capped to 10s
828 		 * (I915_FENCE_TIMEOUT) so this wait should not be unbounded
829 		 * in the worst case.
830 		 */
831 		dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT);
832 		i915_request_put(rq);
833 	}
834 	mutex_unlock(&timelines->mutex);
835 
836 	intel_gt_sanitize(gt, false);
837 
838 	/*
839 	 * Undo nop_submit_request. We prevent all new i915 requests from
840 	 * being queued (by disallowing execbuf whilst wedged) so having
841 	 * waited for all active requests above, we know the system is idle
842 	 * and do not have to worry about a thread being inside
843 	 * engine->submit_request() as we swap over. So unlike installing
844 	 * the nop_submit_request on reset, we can do this from normal
845 	 * context and do not require stop_machine().
846 	 */
847 	intel_engines_reset_default_submission(gt);
848 
849 	GEM_TRACE("end\n");
850 
851 	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
852 	clear_bit(I915_WEDGED, &gt->reset.flags);
853 
854 	return true;
855 }
856 
857 bool intel_gt_unset_wedged(struct intel_gt *gt)
858 {
859 	bool result;
860 
861 	mutex_lock(&gt->reset.mutex);
862 	result = __intel_gt_unset_wedged(gt);
863 	mutex_unlock(&gt->reset.mutex);
864 
865 	return result;
866 }
867 
868 static int do_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
869 {
870 	int err, i;
871 
872 	gt_revoke(gt);
873 
874 	err = __intel_gt_reset(gt, ALL_ENGINES);
875 	for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
876 		msleep(10 * (i + 1));
877 		err = __intel_gt_reset(gt, ALL_ENGINES);
878 	}
879 	if (err)
880 		return err;
881 
882 	return gt_reset(gt, stalled_mask);
883 }
884 
885 static int resume(struct intel_gt *gt)
886 {
887 	struct intel_engine_cs *engine;
888 	enum intel_engine_id id;
889 	int ret;
890 
891 	for_each_engine(engine, gt->i915, id) {
892 		ret = engine->resume(engine);
893 		if (ret)
894 			return ret;
895 	}
896 
897 	return 0;
898 }
899 
900 /**
901  * intel_gt_reset - reset chip after a hang
902  * @gt: #intel_gt to reset
903  * @stalled_mask: mask of the stalled engines with the guilty requests
904  * @reason: user error message for why we are resetting
905  *
906  * Reset the chip.  Useful if a hang is detected. Marks the device as wedged
907  * on failure.
908  *
909  * Procedure is fairly simple:
910  *   - reset the chip using the reset reg
911  *   - re-init context state
912  *   - re-init hardware status page
913  *   - re-init ring buffer
914  *   - re-init interrupt state
915  *   - re-init display
916  */
917 void intel_gt_reset(struct intel_gt *gt,
918 		    intel_engine_mask_t stalled_mask,
919 		    const char *reason)
920 {
921 	intel_engine_mask_t awake;
922 	int ret;
923 
924 	GEM_TRACE("flags=%lx\n", gt->reset.flags);
925 
926 	might_sleep();
927 	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &gt->reset.flags));
928 	mutex_lock(&gt->reset.mutex);
929 
930 	/* Clear any previous failed attempts at recovery. Time to try again. */
931 	if (!__intel_gt_unset_wedged(gt))
932 		goto unlock;
933 
934 	if (reason)
935 		dev_notice(gt->i915->drm.dev,
936 			   "Resetting chip for %s\n", reason);
937 	atomic_inc(&gt->i915->gpu_error.reset_count);
938 
939 	awake = reset_prepare(gt);
940 
941 	if (!intel_has_gpu_reset(gt->i915)) {
942 		if (i915_modparams.reset)
943 			dev_err(gt->i915->drm.dev, "GPU reset not supported\n");
944 		else
945 			DRM_DEBUG_DRIVER("GPU reset disabled\n");
946 		goto error;
947 	}
948 
949 	if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
950 		intel_runtime_pm_disable_interrupts(gt->i915);
951 
952 	if (do_reset(gt, stalled_mask)) {
953 		dev_err(gt->i915->drm.dev, "Failed to reset chip\n");
954 		goto taint;
955 	}
956 
957 	if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
958 		intel_runtime_pm_enable_interrupts(gt->i915);
959 
960 	intel_overlay_reset(gt->i915);
961 
962 	/*
963 	 * Next we need to restore the context, but we don't use those
964 	 * yet either...
965 	 *
966 	 * Ring buffer needs to be re-initialized in the KMS case, or if X
967 	 * was running at the time of the reset (i.e. we weren't VT
968 	 * switched away).
969 	 */
970 	ret = i915_gem_init_hw(gt->i915);
971 	if (ret) {
972 		DRM_ERROR("Failed to initialise HW following reset (%d)\n",
973 			  ret);
974 		goto taint;
975 	}
976 
977 	ret = resume(gt);
978 	if (ret)
979 		goto taint;
980 
981 	intel_gt_queue_hangcheck(gt);
982 
983 finish:
984 	reset_finish(gt, awake);
985 unlock:
986 	mutex_unlock(&gt->reset.mutex);
987 	return;
988 
989 taint:
990 	/*
991 	 * History tells us that if we cannot reset the GPU now, we
992 	 * never will. This then impacts everything that is run
993 	 * subsequently. On failing the reset, we mark the driver
994 	 * as wedged, preventing further execution on the GPU.
995 	 * We also want to go one step further and add a taint to the
996 	 * kernel so that any subsequent faults can be traced back to
997 	 * this failure. This is important for CI, where if the
998 	 * GPU/driver fails we would like to reboot and restart testing
999 	 * rather than continue on into oblivion. For everyone else,
1000 	 * the system should still plod along, but they have been warned!
1001 	 */
1002 	add_taint_for_CI(TAINT_WARN);
1003 error:
1004 	__intel_gt_set_wedged(gt);
1005 	goto finish;
1006 }
1007 
1008 static inline int intel_gt_reset_engine(struct intel_engine_cs *engine)
1009 {
1010 	return __intel_gt_reset(engine->gt, engine->mask);
1011 }
1012 
1013 /**
1014  * intel_engine_reset - reset GPU engine to recover from a hang
1015  * @engine: engine to reset
1016  * @msg: reason for GPU reset; or NULL for no dev_notice()
1017  *
1018  * Reset a specific GPU engine. Useful if a hang is detected.
1019  * Returns zero on successful reset or otherwise an error code.
1020  *
1021  * Procedure is:
1022  *  - identifies the request that caused the hang and it is dropped
1023  *  - reset engine (which will force the engine to idle)
1024  *  - re-init/configure engine
1025  */
1026 int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
1027 {
1028 	struct intel_gt *gt = engine->gt;
1029 	int ret;
1030 
1031 	GEM_TRACE("%s flags=%lx\n", engine->name, gt->reset.flags);
1032 	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags));
1033 
1034 	if (!intel_engine_pm_get_if_awake(engine))
1035 		return 0;
1036 
1037 	reset_prepare_engine(engine);
1038 
1039 	if (msg)
1040 		dev_notice(engine->i915->drm.dev,
1041 			   "Resetting %s for %s\n", engine->name, msg);
1042 	atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
1043 
1044 	if (!engine->gt->uc.guc.execbuf_client)
1045 		ret = intel_gt_reset_engine(engine);
1046 	else
1047 		ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine);
1048 	if (ret) {
1049 		/* If we fail here, we expect to fallback to a global reset */
1050 		DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
1051 				 engine->gt->uc.guc.execbuf_client ? "GuC " : "",
1052 				 engine->name, ret);
1053 		goto out;
1054 	}
1055 
1056 	/*
1057 	 * The request that caused the hang is stuck on elsp, we know the
1058 	 * active request and can drop it, adjust head to skip the offending
1059 	 * request to resume executing remaining requests in the queue.
1060 	 */
1061 	__intel_engine_reset(engine, true);
1062 
1063 	/*
1064 	 * The engine and its registers (and workarounds in case of render)
1065 	 * have been reset to their default values. Follow the init_ring
1066 	 * process to program RING_MODE, HWSP and re-enable submission.
1067 	 */
1068 	ret = engine->resume(engine);
1069 
1070 out:
1071 	intel_engine_cancel_stop_cs(engine);
1072 	reset_finish_engine(engine);
1073 	intel_engine_pm_put(engine);
1074 	return ret;
1075 }
1076 
1077 static void intel_gt_reset_global(struct intel_gt *gt,
1078 				  u32 engine_mask,
1079 				  const char *reason)
1080 {
1081 	struct kobject *kobj = &gt->i915->drm.primary->kdev->kobj;
1082 	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
1083 	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
1084 	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
1085 	struct intel_wedge_me w;
1086 
1087 	kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
1088 
1089 	DRM_DEBUG_DRIVER("resetting chip\n");
1090 	kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
1091 
1092 	/* Use a watchdog to ensure that our reset completes */
1093 	intel_wedge_on_timeout(&w, gt, 5 * HZ) {
1094 		intel_prepare_reset(gt->i915);
1095 
1096 		/* Flush everyone using a resource about to be clobbered */
1097 		synchronize_srcu_expedited(&gt->reset.backoff_srcu);
1098 
1099 		intel_gt_reset(gt, engine_mask, reason);
1100 
1101 		intel_finish_reset(gt->i915);
1102 	}
1103 
1104 	if (!test_bit(I915_WEDGED, &gt->reset.flags))
1105 		kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event);
1106 }
1107 
1108 /**
1109  * intel_gt_handle_error - handle a gpu error
1110  * @gt: the intel_gt
1111  * @engine_mask: mask representing engines that are hung
1112  * @flags: control flags
1113  * @fmt: Error message format string
1114  *
1115  * Do some basic checking of register state at error time and
1116  * dump it to the syslog.  Also call i915_capture_error_state() to make
1117  * sure we get a record and make it available in debugfs.  Fire a uevent
1118  * so userspace knows something bad happened (should trigger collection
1119  * of a ring dump etc.).
1120  */
1121 void intel_gt_handle_error(struct intel_gt *gt,
1122 			   intel_engine_mask_t engine_mask,
1123 			   unsigned long flags,
1124 			   const char *fmt, ...)
1125 {
1126 	struct intel_engine_cs *engine;
1127 	intel_wakeref_t wakeref;
1128 	intel_engine_mask_t tmp;
1129 	char error_msg[80];
1130 	char *msg = NULL;
1131 
1132 	if (fmt) {
1133 		va_list args;
1134 
1135 		va_start(args, fmt);
1136 		vscnprintf(error_msg, sizeof(error_msg), fmt, args);
1137 		va_end(args);
1138 
1139 		msg = error_msg;
1140 	}
1141 
1142 	/*
1143 	 * In most cases it's guaranteed that we get here with an RPM
1144 	 * reference held, for example because there is a pending GPU
1145 	 * request that won't finish until the reset is done. This
1146 	 * isn't the case at least when we get here by doing a
1147 	 * simulated reset via debugfs, so get an RPM reference.
1148 	 */
1149 	wakeref = intel_runtime_pm_get(&gt->i915->runtime_pm);
1150 
1151 	engine_mask &= INTEL_INFO(gt->i915)->engine_mask;
1152 
1153 	if (flags & I915_ERROR_CAPTURE) {
1154 		i915_capture_error_state(gt->i915, engine_mask, msg);
1155 		intel_gt_clear_error_registers(gt, engine_mask);
1156 	}
1157 
1158 	/*
1159 	 * Try engine reset when available. We fall back to full reset if
1160 	 * single reset fails.
1161 	 */
1162 	if (intel_has_reset_engine(gt->i915) && !intel_gt_is_wedged(gt)) {
1163 		for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
1164 			BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
1165 			if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
1166 					     &gt->reset.flags))
1167 				continue;
1168 
1169 			if (intel_engine_reset(engine, msg) == 0)
1170 				engine_mask &= ~engine->mask;
1171 
1172 			clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
1173 					      &gt->reset.flags);
1174 		}
1175 	}
1176 
1177 	if (!engine_mask)
1178 		goto out;
1179 
1180 	/* Full reset needs the mutex, stop any other user trying to do so. */
1181 	if (test_and_set_bit(I915_RESET_BACKOFF, &gt->reset.flags)) {
1182 		wait_event(gt->reset.queue,
1183 			   !test_bit(I915_RESET_BACKOFF, &gt->reset.flags));
1184 		goto out; /* piggy-back on the other reset */
1185 	}
1186 
1187 	/* Make sure i915_reset_trylock() sees the I915_RESET_BACKOFF */
1188 	synchronize_rcu_expedited();
1189 
1190 	/* Prevent any other reset-engine attempt. */
1191 	for_each_engine(engine, gt->i915, tmp) {
1192 		while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
1193 					&gt->reset.flags))
1194 			wait_on_bit(&gt->reset.flags,
1195 				    I915_RESET_ENGINE + engine->id,
1196 				    TASK_UNINTERRUPTIBLE);
1197 	}
1198 
1199 	intel_gt_reset_global(gt, engine_mask, msg);
1200 
1201 	for_each_engine(engine, gt->i915, tmp)
1202 		clear_bit_unlock(I915_RESET_ENGINE + engine->id,
1203 				 &gt->reset.flags);
1204 	clear_bit_unlock(I915_RESET_BACKOFF, &gt->reset.flags);
1205 	smp_mb__after_atomic();
1206 	wake_up_all(&gt->reset.queue);
1207 
1208 out:
1209 	intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
1210 }
1211 
1212 int intel_gt_reset_trylock(struct intel_gt *gt)
1213 {
1214 	int srcu;
1215 
1216 	might_lock(&gt->reset.backoff_srcu);
1217 	might_sleep();
1218 
1219 	rcu_read_lock();
1220 	while (test_bit(I915_RESET_BACKOFF, &gt->reset.flags)) {
1221 		rcu_read_unlock();
1222 
1223 		if (wait_event_interruptible(gt->reset.queue,
1224 					     !test_bit(I915_RESET_BACKOFF,
1225 						       &gt->reset.flags)))
1226 			return -EINTR;
1227 
1228 		rcu_read_lock();
1229 	}
1230 	srcu = srcu_read_lock(&gt->reset.backoff_srcu);
1231 	rcu_read_unlock();
1232 
1233 	return srcu;
1234 }
1235 
1236 void intel_gt_reset_unlock(struct intel_gt *gt, int tag)
1237 __releases(&gt->reset.backoff_srcu)
1238 {
1239 	srcu_read_unlock(&gt->reset.backoff_srcu, tag);
1240 }
1241 
1242 int intel_gt_terminally_wedged(struct intel_gt *gt)
1243 {
1244 	might_sleep();
1245 
1246 	if (!intel_gt_is_wedged(gt))
1247 		return 0;
1248 
1249 	/* Reset still in progress? Maybe we will recover? */
1250 	if (!test_bit(I915_RESET_BACKOFF, &gt->reset.flags))
1251 		return -EIO;
1252 
1253 	/* XXX intel_reset_finish() still takes struct_mutex!!! */
1254 	if (mutex_is_locked(&gt->i915->drm.struct_mutex))
1255 		return -EAGAIN;
1256 
1257 	if (wait_event_interruptible(gt->reset.queue,
1258 				     !test_bit(I915_RESET_BACKOFF,
1259 					       &gt->reset.flags)))
1260 		return -EINTR;
1261 
1262 	return intel_gt_is_wedged(gt) ? -EIO : 0;
1263 }
1264 
1265 void intel_gt_init_reset(struct intel_gt *gt)
1266 {
1267 	init_waitqueue_head(&gt->reset.queue);
1268 	mutex_init(&gt->reset.mutex);
1269 	init_srcu_struct(&gt->reset.backoff_srcu);
1270 }
1271 
1272 void intel_gt_fini_reset(struct intel_gt *gt)
1273 {
1274 	cleanup_srcu_struct(&gt->reset.backoff_srcu);
1275 }
1276 
1277 static void intel_wedge_me(struct work_struct *work)
1278 {
1279 	struct intel_wedge_me *w = container_of(work, typeof(*w), work.work);
1280 
1281 	dev_err(w->gt->i915->drm.dev,
1282 		"%s timed out, cancelling all in-flight rendering.\n",
1283 		w->name);
1284 	intel_gt_set_wedged(w->gt);
1285 }
1286 
1287 void __intel_init_wedge(struct intel_wedge_me *w,
1288 			struct intel_gt *gt,
1289 			long timeout,
1290 			const char *name)
1291 {
1292 	w->gt = gt;
1293 	w->name = name;
1294 
1295 	INIT_DELAYED_WORK_ONSTACK(&w->work, intel_wedge_me);
1296 	schedule_delayed_work(&w->work, timeout);
1297 }
1298 
1299 void __intel_fini_wedge(struct intel_wedge_me *w)
1300 {
1301 	cancel_delayed_work_sync(&w->work);
1302 	destroy_delayed_work_on_stack(&w->work);
1303 	w->gt = NULL;
1304 }
1305 
1306 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1307 #include "selftest_reset.c"
1308 #endif
1309