1d897a111SMichal Wajdeczko /*
2d897a111SMichal Wajdeczko  * SPDX-License-Identifier: MIT
3d897a111SMichal Wajdeczko  *
4e52e4a31SMauro Carvalho Chehab  * Copyright © 2008-2018 Intel Corporation
5d897a111SMichal Wajdeczko  */
6d897a111SMichal Wajdeczko 
7d897a111SMichal Wajdeczko #ifndef _I915_GPU_ERROR_H_
8d897a111SMichal Wajdeczko #define _I915_GPU_ERROR_H_
9d897a111SMichal Wajdeczko 
10cb823ed9SChris Wilson #include <linux/atomic.h>
11d897a111SMichal Wajdeczko #include <linux/kref.h>
12d897a111SMichal Wajdeczko #include <linux/ktime.h>
13d897a111SMichal Wajdeczko #include <linux/sched.h>
14d897a111SMichal Wajdeczko 
15d897a111SMichal Wajdeczko #include <drm/drm_mm.h>
16d897a111SMichal Wajdeczko 
17112ed2d3SChris Wilson #include "gt/intel_engine.h"
18792592e7SDaniele Ceraolo Spurio #include "gt/intel_gt_types.h"
190f261b24SDaniele Ceraolo Spurio #include "gt/uc/intel_uc_fw.h"
20112ed2d3SChris Wilson 
21d897a111SMichal Wajdeczko #include "intel_device_info.h"
22d897a111SMichal Wajdeczko 
23d897a111SMichal Wajdeczko #include "i915_gem.h"
24d897a111SMichal Wajdeczko #include "i915_gem_gtt.h"
25d897a111SMichal Wajdeczko #include "i915_params.h"
26b7268c5eSChris Wilson #include "i915_scheduler.h"
27d897a111SMichal Wajdeczko 
28d897a111SMichal Wajdeczko struct drm_i915_private;
29742379c0SChris Wilson struct i915_vma_compress;
30742379c0SChris Wilson struct intel_engine_capture_vma;
31d897a111SMichal Wajdeczko struct intel_overlay_error_state;
32d897a111SMichal Wajdeczko 
33742379c0SChris Wilson struct i915_vma_coredump {
34742379c0SChris Wilson 	struct i915_vma_coredump *next;
35d897a111SMichal Wajdeczko 
36742379c0SChris Wilson 	char name[20];
37d897a111SMichal Wajdeczko 
38742379c0SChris Wilson 	u64 gtt_offset;
39742379c0SChris Wilson 	u64 gtt_size;
40742379c0SChris Wilson 	u32 gtt_page_sizes;
41d897a111SMichal Wajdeczko 
42742379c0SChris Wilson 	int unused;
43e45b98baSThomas Hellström 	struct list_head page_list;
44742379c0SChris Wilson };
45d897a111SMichal Wajdeczko 
46742379c0SChris Wilson struct i915_request_coredump {
47742379c0SChris Wilson 	unsigned long flags;
48742379c0SChris Wilson 	pid_t pid;
49742379c0SChris Wilson 	u32 context;
50742379c0SChris Wilson 	u32 seqno;
51742379c0SChris Wilson 	u32 head;
52742379c0SChris Wilson 	u32 tail;
53742379c0SChris Wilson 	struct i915_sched_attr sched_attr;
54742379c0SChris Wilson };
55d897a111SMichal Wajdeczko 
56a6f0f9cfSAlan Previn struct __guc_capture_parsed_output;
57a6f0f9cfSAlan Previn 
58742379c0SChris Wilson struct intel_engine_coredump {
59c990b4c3SChris Wilson 	const struct intel_engine_cs *engine;
60c990b4c3SChris Wilson 
61bda30024STvrtko Ursulin 	bool hung;
62742379c0SChris Wilson 	bool simulated;
63d897a111SMichal Wajdeczko 	u32 reset_count;
64d897a111SMichal Wajdeczko 
65d897a111SMichal Wajdeczko 	/* position of active request inside the ring */
66d897a111SMichal Wajdeczko 	u32 rq_head, rq_post, rq_tail;
67d897a111SMichal Wajdeczko 
68d897a111SMichal Wajdeczko 	/* Register state */
69742379c0SChris Wilson 	u32 ccid;
70d897a111SMichal Wajdeczko 	u32 start;
71d897a111SMichal Wajdeczko 	u32 tail;
72d897a111SMichal Wajdeczko 	u32 head;
73d897a111SMichal Wajdeczko 	u32 ctl;
74d897a111SMichal Wajdeczko 	u32 mode;
75d897a111SMichal Wajdeczko 	u32 hws;
76d897a111SMichal Wajdeczko 	u32 ipeir;
77d897a111SMichal Wajdeczko 	u32 ipehr;
7870a76a9bSChris Wilson 	u32 esr;
79d897a111SMichal Wajdeczko 	u32 bbstate;
80d897a111SMichal Wajdeczko 	u32 instpm;
81d897a111SMichal Wajdeczko 	u32 instps;
82d897a111SMichal Wajdeczko 	u64 bbaddr;
83d897a111SMichal Wajdeczko 	u64 acthd;
84d897a111SMichal Wajdeczko 	u32 fault_reg;
85d897a111SMichal Wajdeczko 	u64 faddr;
86d897a111SMichal Wajdeczko 	u32 rc_psmi; /* sleep state */
87d897a111SMichal Wajdeczko 	struct intel_instdone instdone;
88d897a111SMichal Wajdeczko 
89a6f0f9cfSAlan Previn 	/* GuC matched capture-lists info */
90a6f0f9cfSAlan Previn 	struct intel_guc_state_capture *capture;
91a6f0f9cfSAlan Previn 	struct __guc_capture_parsed_output *guc_capture_node;
92a6f0f9cfSAlan Previn 
93742379c0SChris Wilson 	struct i915_gem_context_coredump {
94d897a111SMichal Wajdeczko 		char comm[TASK_COMM_LEN];
951883a0a4STvrtko Ursulin 
961883a0a4STvrtko Ursulin 		u64 total_runtime;
97*bb6287cbSTvrtko Ursulin 		u64 avg_runtime;
981883a0a4STvrtko Ursulin 
99d897a111SMichal Wajdeczko 		pid_t pid;
100d897a111SMichal Wajdeczko 		int active;
101d897a111SMichal Wajdeczko 		int guilty;
102b7268c5eSChris Wilson 		struct i915_sched_attr sched_attr;
103d897a111SMichal Wajdeczko 	} context;
104d897a111SMichal Wajdeczko 
105742379c0SChris Wilson 	struct i915_vma_coredump *vma;
106d897a111SMichal Wajdeczko 
1071a8585bdSChris Wilson 	struct i915_request_coredump execlist[EXECLIST_MAX_PORTS];
108d897a111SMichal Wajdeczko 	unsigned int num_ports;
109d897a111SMichal Wajdeczko 
110d897a111SMichal Wajdeczko 	struct {
111d897a111SMichal Wajdeczko 		u32 gfx_mode;
112d897a111SMichal Wajdeczko 		union {
113d897a111SMichal Wajdeczko 			u64 pdp[4];
114d897a111SMichal Wajdeczko 			u32 pp_dir_base;
115d897a111SMichal Wajdeczko 		};
116d897a111SMichal Wajdeczko 	} vm_info;
117c990b4c3SChris Wilson 
118742379c0SChris Wilson 	struct intel_engine_coredump *next;
119742379c0SChris Wilson };
120742379c0SChris Wilson 
121742379c0SChris Wilson struct intel_gt_coredump {
122742379c0SChris Wilson 	const struct intel_gt *_gt;
123742379c0SChris Wilson 	bool awake;
124742379c0SChris Wilson 	bool simulated;
125742379c0SChris Wilson 
126792592e7SDaniele Ceraolo Spurio 	struct intel_gt_info info;
127792592e7SDaniele Ceraolo Spurio 
128742379c0SChris Wilson 	/* Generic register state */
129742379c0SChris Wilson 	u32 eir;
130742379c0SChris Wilson 	u32 pgtbl_er;
131742379c0SChris Wilson 	u32 ier;
132742379c0SChris Wilson 	u32 gtier[6], ngtier;
133742379c0SChris Wilson 	u32 forcewake;
134742379c0SChris Wilson 	u32 error; /* gen6+ */
135742379c0SChris Wilson 	u32 err_int; /* gen7 */
136742379c0SChris Wilson 	u32 fault_data0; /* gen8, gen9 */
137742379c0SChris Wilson 	u32 fault_data1; /* gen8, gen9 */
138742379c0SChris Wilson 	u32 done_reg;
139742379c0SChris Wilson 	u32 gac_eco;
140742379c0SChris Wilson 	u32 gam_ecochk;
141742379c0SChris Wilson 	u32 gab_ctl;
142742379c0SChris Wilson 	u32 gfx_mode;
143742379c0SChris Wilson 	u32 gtt_cache;
144742379c0SChris Wilson 	u32 aux_err; /* gen12 */
145742379c0SChris Wilson 	u32 gam_done; /* gen12 */
146742379c0SChris Wilson 
147a6f0f9cfSAlan Previn 	/* Display related */
148a6f0f9cfSAlan Previn 	u32 derrmr;
149a6f0f9cfSAlan Previn 	u32 sfc_done[I915_MAX_SFC]; /* gen12 */
150a6f0f9cfSAlan Previn 
151742379c0SChris Wilson 	u32 nfence;
152742379c0SChris Wilson 	u64 fence[I915_MAX_NUM_FENCES];
153742379c0SChris Wilson 
154742379c0SChris Wilson 	struct intel_engine_coredump *engine;
155742379c0SChris Wilson 
156742379c0SChris Wilson 	struct intel_uc_coredump {
157742379c0SChris Wilson 		struct intel_uc_fw guc_fw;
158742379c0SChris Wilson 		struct intel_uc_fw huc_fw;
159742379c0SChris Wilson 		struct i915_vma_coredump *guc_log;
160a6f0f9cfSAlan Previn 		bool is_guc_capture;
161742379c0SChris Wilson 	} *uc;
162742379c0SChris Wilson 
163742379c0SChris Wilson 	struct intel_gt_coredump *next;
164742379c0SChris Wilson };
165742379c0SChris Wilson 
166742379c0SChris Wilson struct i915_gpu_coredump {
167742379c0SChris Wilson 	struct kref ref;
168742379c0SChris Wilson 	ktime_t time;
169742379c0SChris Wilson 	ktime_t boottime;
170742379c0SChris Wilson 	ktime_t uptime;
171742379c0SChris Wilson 	unsigned long capture;
172742379c0SChris Wilson 
173742379c0SChris Wilson 	struct drm_i915_private *i915;
174742379c0SChris Wilson 
175742379c0SChris Wilson 	struct intel_gt_coredump *gt;
176742379c0SChris Wilson 
177742379c0SChris Wilson 	char error_msg[128];
178742379c0SChris Wilson 	bool simulated;
179742379c0SChris Wilson 	bool wakelock;
180742379c0SChris Wilson 	bool suspended;
181742379c0SChris Wilson 	int iommu;
182742379c0SChris Wilson 	u32 reset_count;
183742379c0SChris Wilson 	u32 suspend_count;
184742379c0SChris Wilson 
185742379c0SChris Wilson 	struct intel_device_info device_info;
186742379c0SChris Wilson 	struct intel_runtime_info runtime_info;
187742379c0SChris Wilson 	struct intel_driver_caps driver_caps;
188742379c0SChris Wilson 	struct i915_params params;
189742379c0SChris Wilson 
190742379c0SChris Wilson 	struct intel_overlay_error_state *overlay;
191d897a111SMichal Wajdeczko 
1920e39037bSChris Wilson 	struct scatterlist *sgl, *fit;
193d897a111SMichal Wajdeczko };
194d897a111SMichal Wajdeczko 
195d897a111SMichal Wajdeczko struct i915_gpu_error {
196d897a111SMichal Wajdeczko 	/* For reset and error_state handling. */
197d897a111SMichal Wajdeczko 	spinlock_t lock;
198d897a111SMichal Wajdeczko 	/* Protected by the above dev->gpu_error.lock. */
199742379c0SChris Wilson 	struct i915_gpu_coredump *first_error;
200d897a111SMichal Wajdeczko 
201d897a111SMichal Wajdeczko 	atomic_t pending_fb_pin;
202d897a111SMichal Wajdeczko 
2032caffbf1SChris Wilson 	/** Number of times the device has been reset (global) */
204cb823ed9SChris Wilson 	atomic_t reset_count;
2052caffbf1SChris Wilson 
206d897a111SMichal Wajdeczko 	/** Number of times an engine has been reset */
207cb823ed9SChris Wilson 	atomic_t reset_engine_count[I915_NUM_ENGINES];
208d897a111SMichal Wajdeczko };
209d897a111SMichal Wajdeczko 
210d897a111SMichal Wajdeczko struct drm_i915_error_state_buf {
211d897a111SMichal Wajdeczko 	struct drm_i915_private *i915;
2120e39037bSChris Wilson 	struct scatterlist *sgl, *cur, *end;
2130e39037bSChris Wilson 
2140e39037bSChris Wilson 	char *buf;
2150e39037bSChris Wilson 	size_t bytes;
2160e39037bSChris Wilson 	size_t size;
2170e39037bSChris Wilson 	loff_t iter;
2180e39037bSChris Wilson 
219d897a111SMichal Wajdeczko 	int err;
220d897a111SMichal Wajdeczko };
221d897a111SMichal Wajdeczko 
222f9bf77dfSJani Nikula static inline u32 i915_reset_count(struct i915_gpu_error *error)
223f9bf77dfSJani Nikula {
224f9bf77dfSJani Nikula 	return atomic_read(&error->reset_count);
225f9bf77dfSJani Nikula }
226f9bf77dfSJani Nikula 
227f9bf77dfSJani Nikula static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
228f9bf77dfSJani Nikula 					  const struct intel_engine_cs *engine)
229f9bf77dfSJani Nikula {
230f9bf77dfSJani Nikula 	return atomic_read(&error->reset_engine_count[engine->uabi_class]);
231f9bf77dfSJani Nikula }
232f9bf77dfSJani Nikula 
233a6f0f9cfSAlan Previn #define CORE_DUMP_FLAG_NONE           0x0
234a6f0f9cfSAlan Previn #define CORE_DUMP_FLAG_IS_GUC_CAPTURE BIT(0)
235a6f0f9cfSAlan Previn 
236d897a111SMichal Wajdeczko #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
237d897a111SMichal Wajdeczko 
238d897a111SMichal Wajdeczko __printf(2, 3)
239d897a111SMichal Wajdeczko void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
240a0f1f7b4SAlan Previn void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m,
241a0f1f7b4SAlan Previn 			       const struct intel_engine_cs *engine,
242a0f1f7b4SAlan Previn 			       const struct i915_vma_coredump *vma);
243a0f1f7b4SAlan Previn struct i915_vma_coredump *
244a0f1f7b4SAlan Previn intel_gpu_error_find_batch(const struct intel_engine_coredump *ee);
245d897a111SMichal Wajdeczko 
246bda30024STvrtko Ursulin struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt,
247a6f0f9cfSAlan Previn 					    intel_engine_mask_t engine_mask, u32 dump_flags);
248bda30024STvrtko Ursulin void i915_capture_error_state(struct intel_gt *gt,
249a6f0f9cfSAlan Previn 			      intel_engine_mask_t engine_mask, u32 dump_flags);
250d897a111SMichal Wajdeczko 
251742379c0SChris Wilson struct i915_gpu_coredump *
252742379c0SChris Wilson i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
253742379c0SChris Wilson 
254742379c0SChris Wilson struct intel_gt_coredump *
255a6f0f9cfSAlan Previn intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags);
256742379c0SChris Wilson 
257742379c0SChris Wilson struct intel_engine_coredump *
258a6f0f9cfSAlan Previn intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags);
259742379c0SChris Wilson 
260742379c0SChris Wilson struct intel_engine_capture_vma *
261742379c0SChris Wilson intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
262742379c0SChris Wilson 				  struct i915_request *rq,
263742379c0SChris Wilson 				  gfp_t gfp);
264742379c0SChris Wilson 
265742379c0SChris Wilson void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
266742379c0SChris Wilson 				   struct intel_engine_capture_vma *capture,
267742379c0SChris Wilson 				   struct i915_vma_compress *compress);
268742379c0SChris Wilson 
269742379c0SChris Wilson struct i915_vma_compress *
270742379c0SChris Wilson i915_vma_capture_prepare(struct intel_gt_coredump *gt);
271742379c0SChris Wilson 
272742379c0SChris Wilson void i915_vma_capture_finish(struct intel_gt_coredump *gt,
273742379c0SChris Wilson 			     struct i915_vma_compress *compress);
274742379c0SChris Wilson 
275742379c0SChris Wilson void i915_error_state_store(struct i915_gpu_coredump *error);
276742379c0SChris Wilson 
277742379c0SChris Wilson static inline struct i915_gpu_coredump *
278742379c0SChris Wilson i915_gpu_coredump_get(struct i915_gpu_coredump *gpu)
279d897a111SMichal Wajdeczko {
280d897a111SMichal Wajdeczko 	kref_get(&gpu->ref);
281d897a111SMichal Wajdeczko 	return gpu;
282d897a111SMichal Wajdeczko }
283d897a111SMichal Wajdeczko 
284742379c0SChris Wilson ssize_t
285742379c0SChris Wilson i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
2860e39037bSChris Wilson 				 char *buf, loff_t offset, size_t count);
2870e39037bSChris Wilson 
288742379c0SChris Wilson void __i915_gpu_coredump_free(struct kref *kref);
289742379c0SChris Wilson static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
290d897a111SMichal Wajdeczko {
291d897a111SMichal Wajdeczko 	if (gpu)
292742379c0SChris Wilson 		kref_put(&gpu->ref, __i915_gpu_coredump_free);
293d897a111SMichal Wajdeczko }
294d897a111SMichal Wajdeczko 
295742379c0SChris Wilson struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
296d897a111SMichal Wajdeczko void i915_reset_error_state(struct drm_i915_private *i915);
297fb6f0b64SChris Wilson void i915_disable_error_state(struct drm_i915_private *i915, int err);
298d897a111SMichal Wajdeczko 
299d897a111SMichal Wajdeczko #else
300d897a111SMichal Wajdeczko 
301bda30024STvrtko Ursulin static inline void
302a6f0f9cfSAlan Previn i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags)
303d897a111SMichal Wajdeczko {
304d897a111SMichal Wajdeczko }
305d897a111SMichal Wajdeczko 
306742379c0SChris Wilson static inline struct i915_gpu_coredump *
307742379c0SChris Wilson i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
308742379c0SChris Wilson {
309742379c0SChris Wilson 	return NULL;
310742379c0SChris Wilson }
311742379c0SChris Wilson 
312742379c0SChris Wilson static inline struct intel_gt_coredump *
313a6f0f9cfSAlan Previn intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags)
314742379c0SChris Wilson {
315742379c0SChris Wilson 	return NULL;
316742379c0SChris Wilson }
317742379c0SChris Wilson 
318742379c0SChris Wilson static inline struct intel_engine_coredump *
319a6f0f9cfSAlan Previn intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags)
320742379c0SChris Wilson {
321742379c0SChris Wilson 	return NULL;
322742379c0SChris Wilson }
323742379c0SChris Wilson 
324742379c0SChris Wilson static inline struct intel_engine_capture_vma *
325742379c0SChris Wilson intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
326742379c0SChris Wilson 				  struct i915_request *rq,
327742379c0SChris Wilson 				  gfp_t gfp)
328742379c0SChris Wilson {
329742379c0SChris Wilson 	return NULL;
330742379c0SChris Wilson }
331742379c0SChris Wilson 
332742379c0SChris Wilson static inline void
333742379c0SChris Wilson intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
334742379c0SChris Wilson 			      struct intel_engine_capture_vma *capture,
335742379c0SChris Wilson 			      struct i915_vma_compress *compress)
336742379c0SChris Wilson {
337742379c0SChris Wilson }
338742379c0SChris Wilson 
339742379c0SChris Wilson static inline struct i915_vma_compress *
340d713e3abSChris Wilson i915_vma_capture_prepare(struct intel_gt_coredump *gt)
341742379c0SChris Wilson {
342742379c0SChris Wilson 	return NULL;
343742379c0SChris Wilson }
344742379c0SChris Wilson 
34559be9b9cSZhang Xiaoxu static inline void
34659be9b9cSZhang Xiaoxu i915_vma_capture_finish(struct intel_gt_coredump *gt,
347d713e3abSChris Wilson 			struct i915_vma_compress *compress)
348742379c0SChris Wilson {
349742379c0SChris Wilson }
350742379c0SChris Wilson 
351742379c0SChris Wilson static inline void
35204062c58SZhang Xiaoxu i915_error_state_store(struct i915_gpu_coredump *error)
353742379c0SChris Wilson {
354742379c0SChris Wilson }
355742379c0SChris Wilson 
3567e36505dSChris Wilson static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
3577e36505dSChris Wilson {
3587e36505dSChris Wilson }
3597e36505dSChris Wilson 
360742379c0SChris Wilson static inline struct i915_gpu_coredump *
361d897a111SMichal Wajdeczko i915_first_error_state(struct drm_i915_private *i915)
362d897a111SMichal Wajdeczko {
363fb6f0b64SChris Wilson 	return ERR_PTR(-ENODEV);
364d897a111SMichal Wajdeczko }
365d897a111SMichal Wajdeczko 
366d897a111SMichal Wajdeczko static inline void i915_reset_error_state(struct drm_i915_private *i915)
367d897a111SMichal Wajdeczko {
368d897a111SMichal Wajdeczko }
369d897a111SMichal Wajdeczko 
370fb6f0b64SChris Wilson static inline void i915_disable_error_state(struct drm_i915_private *i915,
371fb6f0b64SChris Wilson 					    int err)
372fb6f0b64SChris Wilson {
373fb6f0b64SChris Wilson }
374fb6f0b64SChris Wilson 
375d897a111SMichal Wajdeczko #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
376d897a111SMichal Wajdeczko 
377d897a111SMichal Wajdeczko #endif /* _I915_GPU_ERROR_H_ */
378