1d897a111SMichal Wajdeczko /*
2d897a111SMichal Wajdeczko  * SPDX-License-Identifier: MIT
3d897a111SMichal Wajdeczko  *
4d897a111SMichal Wajdeczko  * Copyright � 2008-2018 Intel Corporation
5d897a111SMichal Wajdeczko  */
6d897a111SMichal Wajdeczko 
7d897a111SMichal Wajdeczko #ifndef _I915_GPU_ERROR_H_
8d897a111SMichal Wajdeczko #define _I915_GPU_ERROR_H_
9d897a111SMichal Wajdeczko 
10cb823ed9SChris Wilson #include <linux/atomic.h>
11d897a111SMichal Wajdeczko #include <linux/kref.h>
12d897a111SMichal Wajdeczko #include <linux/ktime.h>
13d897a111SMichal Wajdeczko #include <linux/sched.h>
14d897a111SMichal Wajdeczko 
15d897a111SMichal Wajdeczko #include <drm/drm_mm.h>
16d897a111SMichal Wajdeczko 
17112ed2d3SChris Wilson #include "gt/intel_engine.h"
180f261b24SDaniele Ceraolo Spurio #include "gt/uc/intel_uc_fw.h"
19112ed2d3SChris Wilson 
20d897a111SMichal Wajdeczko #include "intel_device_info.h"
21d897a111SMichal Wajdeczko 
22d897a111SMichal Wajdeczko #include "i915_gem.h"
23d897a111SMichal Wajdeczko #include "i915_gem_gtt.h"
24d897a111SMichal Wajdeczko #include "i915_params.h"
25b7268c5eSChris Wilson #include "i915_scheduler.h"
26d897a111SMichal Wajdeczko 
27d897a111SMichal Wajdeczko struct drm_i915_private;
28742379c0SChris Wilson struct i915_vma_compress;
29742379c0SChris Wilson struct intel_engine_capture_vma;
30d897a111SMichal Wajdeczko struct intel_overlay_error_state;
31d897a111SMichal Wajdeczko struct intel_display_error_state;
32d897a111SMichal Wajdeczko 
33742379c0SChris Wilson struct i915_vma_coredump {
34742379c0SChris Wilson 	struct i915_vma_coredump *next;
35d897a111SMichal Wajdeczko 
36742379c0SChris Wilson 	char name[20];
37d897a111SMichal Wajdeczko 
38742379c0SChris Wilson 	u64 gtt_offset;
39742379c0SChris Wilson 	u64 gtt_size;
40742379c0SChris Wilson 	u32 gtt_page_sizes;
41d897a111SMichal Wajdeczko 
42742379c0SChris Wilson 	int num_pages;
43742379c0SChris Wilson 	int page_count;
44742379c0SChris Wilson 	int unused;
45742379c0SChris Wilson 	u32 *pages[0];
46742379c0SChris Wilson };
47d897a111SMichal Wajdeczko 
48742379c0SChris Wilson struct i915_request_coredump {
49742379c0SChris Wilson 	unsigned long flags;
50742379c0SChris Wilson 	pid_t pid;
51742379c0SChris Wilson 	u32 context;
52742379c0SChris Wilson 	u32 seqno;
53742379c0SChris Wilson 	u32 start;
54742379c0SChris Wilson 	u32 head;
55742379c0SChris Wilson 	u32 tail;
56742379c0SChris Wilson 	struct i915_sched_attr sched_attr;
57742379c0SChris Wilson };
58d897a111SMichal Wajdeczko 
59742379c0SChris Wilson struct intel_engine_coredump {
60c990b4c3SChris Wilson 	const struct intel_engine_cs *engine;
61c990b4c3SChris Wilson 
62742379c0SChris Wilson 	bool simulated;
63d897a111SMichal Wajdeczko 	int num_requests;
64d897a111SMichal Wajdeczko 	u32 reset_count;
65d897a111SMichal Wajdeczko 
66d897a111SMichal Wajdeczko 	/* position of active request inside the ring */
67d897a111SMichal Wajdeczko 	u32 rq_head, rq_post, rq_tail;
68d897a111SMichal Wajdeczko 
69d897a111SMichal Wajdeczko 	/* our own tracking of ring head and tail */
70d897a111SMichal Wajdeczko 	u32 cpu_ring_head;
71d897a111SMichal Wajdeczko 	u32 cpu_ring_tail;
72d897a111SMichal Wajdeczko 
73d897a111SMichal Wajdeczko 	/* Register state */
74742379c0SChris Wilson 	u32 ccid;
75d897a111SMichal Wajdeczko 	u32 start;
76d897a111SMichal Wajdeczko 	u32 tail;
77d897a111SMichal Wajdeczko 	u32 head;
78d897a111SMichal Wajdeczko 	u32 ctl;
79d897a111SMichal Wajdeczko 	u32 mode;
80d897a111SMichal Wajdeczko 	u32 hws;
81d897a111SMichal Wajdeczko 	u32 ipeir;
82d897a111SMichal Wajdeczko 	u32 ipehr;
83d897a111SMichal Wajdeczko 	u32 bbstate;
84d897a111SMichal Wajdeczko 	u32 instpm;
85d897a111SMichal Wajdeczko 	u32 instps;
86d897a111SMichal Wajdeczko 	u64 bbaddr;
87d897a111SMichal Wajdeczko 	u64 acthd;
88d897a111SMichal Wajdeczko 	u32 fault_reg;
89d897a111SMichal Wajdeczko 	u64 faddr;
90d897a111SMichal Wajdeczko 	u32 rc_psmi; /* sleep state */
91d897a111SMichal Wajdeczko 	struct intel_instdone instdone;
92d897a111SMichal Wajdeczko 
93742379c0SChris Wilson 	struct i915_gem_context_coredump {
94d897a111SMichal Wajdeczko 		char comm[TASK_COMM_LEN];
95d897a111SMichal Wajdeczko 		pid_t pid;
96d897a111SMichal Wajdeczko 		int active;
97d897a111SMichal Wajdeczko 		int guilty;
98b7268c5eSChris Wilson 		struct i915_sched_attr sched_attr;
99d897a111SMichal Wajdeczko 	} context;
100d897a111SMichal Wajdeczko 
101742379c0SChris Wilson 	struct i915_vma_coredump *vma;
102d897a111SMichal Wajdeczko 
103742379c0SChris Wilson 	struct i915_request_coredump *requests, execlist[EXECLIST_MAX_PORTS];
104d897a111SMichal Wajdeczko 	unsigned int num_ports;
105d897a111SMichal Wajdeczko 
106d897a111SMichal Wajdeczko 	struct {
107d897a111SMichal Wajdeczko 		u32 gfx_mode;
108d897a111SMichal Wajdeczko 		union {
109d897a111SMichal Wajdeczko 			u64 pdp[4];
110d897a111SMichal Wajdeczko 			u32 pp_dir_base;
111d897a111SMichal Wajdeczko 		};
112d897a111SMichal Wajdeczko 	} vm_info;
113c990b4c3SChris Wilson 
114742379c0SChris Wilson 	struct intel_engine_coredump *next;
115742379c0SChris Wilson };
116742379c0SChris Wilson 
117742379c0SChris Wilson struct intel_gt_coredump {
118742379c0SChris Wilson 	const struct intel_gt *_gt;
119742379c0SChris Wilson 	bool awake;
120742379c0SChris Wilson 	bool simulated;
121742379c0SChris Wilson 
122742379c0SChris Wilson 	/* Generic register state */
123742379c0SChris Wilson 	u32 eir;
124742379c0SChris Wilson 	u32 pgtbl_er;
125742379c0SChris Wilson 	u32 ier;
126742379c0SChris Wilson 	u32 gtier[6], ngtier;
127742379c0SChris Wilson 	u32 derrmr;
128742379c0SChris Wilson 	u32 forcewake;
129742379c0SChris Wilson 	u32 error; /* gen6+ */
130742379c0SChris Wilson 	u32 err_int; /* gen7 */
131742379c0SChris Wilson 	u32 fault_data0; /* gen8, gen9 */
132742379c0SChris Wilson 	u32 fault_data1; /* gen8, gen9 */
133742379c0SChris Wilson 	u32 done_reg;
134742379c0SChris Wilson 	u32 gac_eco;
135742379c0SChris Wilson 	u32 gam_ecochk;
136742379c0SChris Wilson 	u32 gab_ctl;
137742379c0SChris Wilson 	u32 gfx_mode;
138742379c0SChris Wilson 	u32 gtt_cache;
139742379c0SChris Wilson 	u32 aux_err; /* gen12 */
140742379c0SChris Wilson 	u32 sfc_done[GEN12_SFC_DONE_MAX]; /* gen12 */
141742379c0SChris Wilson 	u32 gam_done; /* gen12 */
142742379c0SChris Wilson 
143742379c0SChris Wilson 	u32 nfence;
144742379c0SChris Wilson 	u64 fence[I915_MAX_NUM_FENCES];
145742379c0SChris Wilson 
146742379c0SChris Wilson 	struct intel_engine_coredump *engine;
147742379c0SChris Wilson 
148742379c0SChris Wilson 	struct intel_uc_coredump {
149742379c0SChris Wilson 		struct intel_uc_fw guc_fw;
150742379c0SChris Wilson 		struct intel_uc_fw huc_fw;
151742379c0SChris Wilson 		struct i915_vma_coredump *guc_log;
152742379c0SChris Wilson 	} *uc;
153742379c0SChris Wilson 
154742379c0SChris Wilson 	struct intel_gt_coredump *next;
155742379c0SChris Wilson };
156742379c0SChris Wilson 
157742379c0SChris Wilson struct i915_gpu_coredump {
158742379c0SChris Wilson 	struct kref ref;
159742379c0SChris Wilson 	ktime_t time;
160742379c0SChris Wilson 	ktime_t boottime;
161742379c0SChris Wilson 	ktime_t uptime;
162742379c0SChris Wilson 	unsigned long capture;
163742379c0SChris Wilson 
164742379c0SChris Wilson 	struct drm_i915_private *i915;
165742379c0SChris Wilson 
166742379c0SChris Wilson 	struct intel_gt_coredump *gt;
167742379c0SChris Wilson 
168742379c0SChris Wilson 	char error_msg[128];
169742379c0SChris Wilson 	bool simulated;
170742379c0SChris Wilson 	bool wakelock;
171742379c0SChris Wilson 	bool suspended;
172742379c0SChris Wilson 	int iommu;
173742379c0SChris Wilson 	u32 reset_count;
174742379c0SChris Wilson 	u32 suspend_count;
175742379c0SChris Wilson 
176742379c0SChris Wilson 	struct intel_device_info device_info;
177742379c0SChris Wilson 	struct intel_runtime_info runtime_info;
178742379c0SChris Wilson 	struct intel_driver_caps driver_caps;
179742379c0SChris Wilson 	struct i915_params params;
180742379c0SChris Wilson 
181742379c0SChris Wilson 	struct intel_overlay_error_state *overlay;
182742379c0SChris Wilson 	struct intel_display_error_state *display;
183d897a111SMichal Wajdeczko 
1840e39037bSChris Wilson 	struct scatterlist *sgl, *fit;
185d897a111SMichal Wajdeczko };
186d897a111SMichal Wajdeczko 
187d897a111SMichal Wajdeczko struct i915_gpu_error {
188d897a111SMichal Wajdeczko 	/* For reset and error_state handling. */
189d897a111SMichal Wajdeczko 	spinlock_t lock;
190d897a111SMichal Wajdeczko 	/* Protected by the above dev->gpu_error.lock. */
191742379c0SChris Wilson 	struct i915_gpu_coredump *first_error;
192d897a111SMichal Wajdeczko 
193d897a111SMichal Wajdeczko 	atomic_t pending_fb_pin;
194d897a111SMichal Wajdeczko 
1952caffbf1SChris Wilson 	/** Number of times the device has been reset (global) */
196cb823ed9SChris Wilson 	atomic_t reset_count;
1972caffbf1SChris Wilson 
198d897a111SMichal Wajdeczko 	/** Number of times an engine has been reset */
199cb823ed9SChris Wilson 	atomic_t reset_engine_count[I915_NUM_ENGINES];
200d897a111SMichal Wajdeczko };
201d897a111SMichal Wajdeczko 
202d897a111SMichal Wajdeczko struct drm_i915_error_state_buf {
203d897a111SMichal Wajdeczko 	struct drm_i915_private *i915;
2040e39037bSChris Wilson 	struct scatterlist *sgl, *cur, *end;
2050e39037bSChris Wilson 
2060e39037bSChris Wilson 	char *buf;
2070e39037bSChris Wilson 	size_t bytes;
2080e39037bSChris Wilson 	size_t size;
2090e39037bSChris Wilson 	loff_t iter;
2100e39037bSChris Wilson 
211d897a111SMichal Wajdeczko 	int err;
212d897a111SMichal Wajdeczko };
213d897a111SMichal Wajdeczko 
214d897a111SMichal Wajdeczko #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
215d897a111SMichal Wajdeczko 
216d897a111SMichal Wajdeczko __printf(2, 3)
217d897a111SMichal Wajdeczko void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
218d897a111SMichal Wajdeczko 
219742379c0SChris Wilson struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915);
220742379c0SChris Wilson void i915_capture_error_state(struct drm_i915_private *dev_priv);
221d897a111SMichal Wajdeczko 
222742379c0SChris Wilson struct i915_gpu_coredump *
223742379c0SChris Wilson i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
224742379c0SChris Wilson 
225742379c0SChris Wilson struct intel_gt_coredump *
226742379c0SChris Wilson intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp);
227742379c0SChris Wilson 
228742379c0SChris Wilson struct intel_engine_coredump *
229742379c0SChris Wilson intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
230742379c0SChris Wilson 
231742379c0SChris Wilson struct intel_engine_capture_vma *
232742379c0SChris Wilson intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
233742379c0SChris Wilson 				  struct i915_request *rq,
234742379c0SChris Wilson 				  gfp_t gfp);
235742379c0SChris Wilson 
236742379c0SChris Wilson void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
237742379c0SChris Wilson 				   struct intel_engine_capture_vma *capture,
238742379c0SChris Wilson 				   struct i915_vma_compress *compress);
239742379c0SChris Wilson 
240742379c0SChris Wilson struct i915_vma_compress *
241742379c0SChris Wilson i915_vma_capture_prepare(struct intel_gt_coredump *gt);
242742379c0SChris Wilson 
243742379c0SChris Wilson void i915_vma_capture_finish(struct intel_gt_coredump *gt,
244742379c0SChris Wilson 			     struct i915_vma_compress *compress);
245742379c0SChris Wilson 
246742379c0SChris Wilson void i915_error_state_store(struct i915_gpu_coredump *error);
247742379c0SChris Wilson 
248742379c0SChris Wilson static inline struct i915_gpu_coredump *
249742379c0SChris Wilson i915_gpu_coredump_get(struct i915_gpu_coredump *gpu)
250d897a111SMichal Wajdeczko {
251d897a111SMichal Wajdeczko 	kref_get(&gpu->ref);
252d897a111SMichal Wajdeczko 	return gpu;
253d897a111SMichal Wajdeczko }
254d897a111SMichal Wajdeczko 
255742379c0SChris Wilson ssize_t
256742379c0SChris Wilson i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
2570e39037bSChris Wilson 				 char *buf, loff_t offset, size_t count);
2580e39037bSChris Wilson 
259742379c0SChris Wilson void __i915_gpu_coredump_free(struct kref *kref);
260742379c0SChris Wilson static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
261d897a111SMichal Wajdeczko {
262d897a111SMichal Wajdeczko 	if (gpu)
263742379c0SChris Wilson 		kref_put(&gpu->ref, __i915_gpu_coredump_free);
264d897a111SMichal Wajdeczko }
265d897a111SMichal Wajdeczko 
266742379c0SChris Wilson struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
267d897a111SMichal Wajdeczko void i915_reset_error_state(struct drm_i915_private *i915);
268fb6f0b64SChris Wilson void i915_disable_error_state(struct drm_i915_private *i915, int err);
269d897a111SMichal Wajdeczko 
270d897a111SMichal Wajdeczko #else
271d897a111SMichal Wajdeczko 
272d897a111SMichal Wajdeczko static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
273d897a111SMichal Wajdeczko 					    u32 engine_mask,
274d897a111SMichal Wajdeczko 					    const char *error_msg)
275d897a111SMichal Wajdeczko {
276d897a111SMichal Wajdeczko }
277d897a111SMichal Wajdeczko 
278742379c0SChris Wilson static inline struct i915_gpu_coredump *
279742379c0SChris Wilson i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
280742379c0SChris Wilson {
281742379c0SChris Wilson 	return NULL;
282742379c0SChris Wilson }
283742379c0SChris Wilson 
284742379c0SChris Wilson static inline struct intel_gt_coredump *
285742379c0SChris Wilson intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
286742379c0SChris Wilson {
287742379c0SChris Wilson 	return NULL;
288742379c0SChris Wilson }
289742379c0SChris Wilson 
290742379c0SChris Wilson static inline struct intel_engine_coredump *
291742379c0SChris Wilson intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
292742379c0SChris Wilson {
293742379c0SChris Wilson 	return NULL;
294742379c0SChris Wilson }
295742379c0SChris Wilson 
296742379c0SChris Wilson static inline struct intel_engine_capture_vma *
297742379c0SChris Wilson intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
298742379c0SChris Wilson 				  struct i915_request *rq,
299742379c0SChris Wilson 				  gfp_t gfp)
300742379c0SChris Wilson {
301742379c0SChris Wilson 	return NULL;
302742379c0SChris Wilson }
303742379c0SChris Wilson 
304742379c0SChris Wilson static inline void
305742379c0SChris Wilson intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
306742379c0SChris Wilson 			      struct intel_engine_capture_vma *capture,
307742379c0SChris Wilson 			      struct i915_vma_compress *compress)
308742379c0SChris Wilson {
309742379c0SChris Wilson }
310742379c0SChris Wilson 
311742379c0SChris Wilson static inline struct i915_vma_compress *
312742379c0SChris Wilson i915_vma_compress_prepare(struct intel_gt_coredump *gt)
313742379c0SChris Wilson {
314742379c0SChris Wilson 	return NULL;
315742379c0SChris Wilson }
316742379c0SChris Wilson 
317742379c0SChris Wilson void i915_vma_compress_prepare(struct i915_vma_compress *compress)
318742379c0SChris Wilson {
319742379c0SChris Wilson }
320742379c0SChris Wilson 
321742379c0SChris Wilson static inline void
322742379c0SChris Wilson i915_error_state_store(struct drm_i915_private *i915,
323742379c0SChris Wilson 		       struct i915_gpu_coredump *error)
324742379c0SChris Wilson {
325742379c0SChris Wilson }
326742379c0SChris Wilson 
327742379c0SChris Wilson static inline struct i915_gpu_coredump *
328d897a111SMichal Wajdeczko i915_first_error_state(struct drm_i915_private *i915)
329d897a111SMichal Wajdeczko {
330fb6f0b64SChris Wilson 	return ERR_PTR(-ENODEV);
331d897a111SMichal Wajdeczko }
332d897a111SMichal Wajdeczko 
333d897a111SMichal Wajdeczko static inline void i915_reset_error_state(struct drm_i915_private *i915)
334d897a111SMichal Wajdeczko {
335d897a111SMichal Wajdeczko }
336d897a111SMichal Wajdeczko 
337fb6f0b64SChris Wilson static inline void i915_disable_error_state(struct drm_i915_private *i915,
338fb6f0b64SChris Wilson 					    int err)
339fb6f0b64SChris Wilson {
340fb6f0b64SChris Wilson }
341fb6f0b64SChris Wilson 
342d897a111SMichal Wajdeczko #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
343d897a111SMichal Wajdeczko 
344d897a111SMichal Wajdeczko #endif /* _I915_GPU_ERROR_H_ */
345