1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright � 2008-2018 Intel Corporation
5  */
6 
7 #ifndef _I915_GPU_ERROR_H_
8 #define _I915_GPU_ERROR_H_
9 
10 #include <linux/atomic.h>
11 #include <linux/kref.h>
12 #include <linux/ktime.h>
13 #include <linux/sched.h>
14 
15 #include <drm/drm_mm.h>
16 
17 #include "gt/intel_engine.h"
18 #include "gt/intel_gt_types.h"
19 #include "gt/uc/intel_uc_fw.h"
20 
21 #include "intel_device_info.h"
22 
23 #include "i915_gem.h"
24 #include "i915_gem_gtt.h"
25 #include "i915_params.h"
26 #include "i915_scheduler.h"
27 
28 struct drm_i915_private;
29 struct i915_vma_compress;
30 struct intel_engine_capture_vma;
31 struct intel_overlay_error_state;
32 struct intel_display_error_state;
33 
34 struct i915_vma_coredump {
35 	struct i915_vma_coredump *next;
36 
37 	char name[20];
38 
39 	u64 gtt_offset;
40 	u64 gtt_size;
41 	u32 gtt_page_sizes;
42 
43 	int num_pages;
44 	int page_count;
45 	int unused;
46 	u32 *pages[];
47 };
48 
49 struct i915_request_coredump {
50 	unsigned long flags;
51 	pid_t pid;
52 	u32 context;
53 	u32 seqno;
54 	u32 head;
55 	u32 tail;
56 	struct i915_sched_attr sched_attr;
57 };
58 
59 struct intel_engine_coredump {
60 	const struct intel_engine_cs *engine;
61 
62 	bool simulated;
63 	u32 reset_count;
64 
65 	/* position of active request inside the ring */
66 	u32 rq_head, rq_post, rq_tail;
67 
68 	/* Register state */
69 	u32 ccid;
70 	u32 start;
71 	u32 tail;
72 	u32 head;
73 	u32 ctl;
74 	u32 mode;
75 	u32 hws;
76 	u32 ipeir;
77 	u32 ipehr;
78 	u32 esr;
79 	u32 bbstate;
80 	u32 instpm;
81 	u32 instps;
82 	u64 bbaddr;
83 	u64 acthd;
84 	u32 fault_reg;
85 	u64 faddr;
86 	u32 rc_psmi; /* sleep state */
87 	struct intel_instdone instdone;
88 
89 	struct i915_gem_context_coredump {
90 		char comm[TASK_COMM_LEN];
91 
92 		u64 total_runtime;
93 		u32 avg_runtime;
94 
95 		pid_t pid;
96 		int active;
97 		int guilty;
98 		struct i915_sched_attr sched_attr;
99 	} context;
100 
101 	struct i915_vma_coredump *vma;
102 
103 	struct i915_request_coredump execlist[EXECLIST_MAX_PORTS];
104 	unsigned int num_ports;
105 
106 	struct {
107 		u32 gfx_mode;
108 		union {
109 			u64 pdp[4];
110 			u32 pp_dir_base;
111 		};
112 	} vm_info;
113 
114 	struct intel_engine_coredump *next;
115 };
116 
117 struct intel_gt_coredump {
118 	const struct intel_gt *_gt;
119 	bool awake;
120 	bool simulated;
121 
122 	struct intel_gt_info info;
123 
124 	/* Generic register state */
125 	u32 eir;
126 	u32 pgtbl_er;
127 	u32 ier;
128 	u32 gtier[6], ngtier;
129 	u32 derrmr;
130 	u32 forcewake;
131 	u32 error; /* gen6+ */
132 	u32 err_int; /* gen7 */
133 	u32 fault_data0; /* gen8, gen9 */
134 	u32 fault_data1; /* gen8, gen9 */
135 	u32 done_reg;
136 	u32 gac_eco;
137 	u32 gam_ecochk;
138 	u32 gab_ctl;
139 	u32 gfx_mode;
140 	u32 gtt_cache;
141 	u32 aux_err; /* gen12 */
142 	u32 sfc_done[GEN12_SFC_DONE_MAX]; /* gen12 */
143 	u32 gam_done; /* gen12 */
144 
145 	u32 nfence;
146 	u64 fence[I915_MAX_NUM_FENCES];
147 
148 	struct intel_engine_coredump *engine;
149 
150 	struct intel_uc_coredump {
151 		struct intel_uc_fw guc_fw;
152 		struct intel_uc_fw huc_fw;
153 		struct i915_vma_coredump *guc_log;
154 	} *uc;
155 
156 	struct intel_gt_coredump *next;
157 };
158 
159 struct i915_gpu_coredump {
160 	struct kref ref;
161 	ktime_t time;
162 	ktime_t boottime;
163 	ktime_t uptime;
164 	unsigned long capture;
165 
166 	struct drm_i915_private *i915;
167 
168 	struct intel_gt_coredump *gt;
169 
170 	char error_msg[128];
171 	bool simulated;
172 	bool wakelock;
173 	bool suspended;
174 	int iommu;
175 	u32 reset_count;
176 	u32 suspend_count;
177 
178 	struct intel_device_info device_info;
179 	struct intel_runtime_info runtime_info;
180 	struct intel_driver_caps driver_caps;
181 	struct i915_params params;
182 
183 	struct intel_overlay_error_state *overlay;
184 	struct intel_display_error_state *display;
185 
186 	struct scatterlist *sgl, *fit;
187 };
188 
189 struct i915_gpu_error {
190 	/* For reset and error_state handling. */
191 	spinlock_t lock;
192 	/* Protected by the above dev->gpu_error.lock. */
193 	struct i915_gpu_coredump *first_error;
194 
195 	atomic_t pending_fb_pin;
196 
197 	/** Number of times the device has been reset (global) */
198 	atomic_t reset_count;
199 
200 	/** Number of times an engine has been reset */
201 	atomic_t reset_engine_count[I915_NUM_ENGINES];
202 };
203 
204 struct drm_i915_error_state_buf {
205 	struct drm_i915_private *i915;
206 	struct scatterlist *sgl, *cur, *end;
207 
208 	char *buf;
209 	size_t bytes;
210 	size_t size;
211 	loff_t iter;
212 
213 	int err;
214 };
215 
216 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
217 
218 __printf(2, 3)
219 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
220 
221 struct i915_gpu_coredump *i915_gpu_coredump(struct drm_i915_private *i915);
222 void i915_capture_error_state(struct drm_i915_private *i915);
223 
224 struct i915_gpu_coredump *
225 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
226 
227 struct intel_gt_coredump *
228 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp);
229 
230 struct intel_engine_coredump *
231 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp);
232 
233 struct intel_engine_capture_vma *
234 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
235 				  struct i915_request *rq,
236 				  gfp_t gfp);
237 
238 void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
239 				   struct intel_engine_capture_vma *capture,
240 				   struct i915_vma_compress *compress);
241 
242 struct i915_vma_compress *
243 i915_vma_capture_prepare(struct intel_gt_coredump *gt);
244 
245 void i915_vma_capture_finish(struct intel_gt_coredump *gt,
246 			     struct i915_vma_compress *compress);
247 
248 void i915_error_state_store(struct i915_gpu_coredump *error);
249 
250 static inline struct i915_gpu_coredump *
251 i915_gpu_coredump_get(struct i915_gpu_coredump *gpu)
252 {
253 	kref_get(&gpu->ref);
254 	return gpu;
255 }
256 
257 ssize_t
258 i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
259 				 char *buf, loff_t offset, size_t count);
260 
261 void __i915_gpu_coredump_free(struct kref *kref);
262 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
263 {
264 	if (gpu)
265 		kref_put(&gpu->ref, __i915_gpu_coredump_free);
266 }
267 
268 struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
269 void i915_reset_error_state(struct drm_i915_private *i915);
270 void i915_disable_error_state(struct drm_i915_private *i915, int err);
271 
272 #else
273 
274 static inline void i915_capture_error_state(struct drm_i915_private *i915)
275 {
276 }
277 
278 static inline struct i915_gpu_coredump *
279 i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
280 {
281 	return NULL;
282 }
283 
284 static inline struct intel_gt_coredump *
285 intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp)
286 {
287 	return NULL;
288 }
289 
290 static inline struct intel_engine_coredump *
291 intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
292 {
293 	return NULL;
294 }
295 
296 static inline struct intel_engine_capture_vma *
297 intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
298 				  struct i915_request *rq,
299 				  gfp_t gfp)
300 {
301 	return NULL;
302 }
303 
304 static inline void
305 intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
306 			      struct intel_engine_capture_vma *capture,
307 			      struct i915_vma_compress *compress)
308 {
309 }
310 
311 static inline struct i915_vma_compress *
312 i915_vma_capture_prepare(struct intel_gt_coredump *gt)
313 {
314 	return NULL;
315 }
316 
317 static inline void
318 i915_vma_capture_finish(struct intel_gt_coredump *gt,
319 			struct i915_vma_compress *compress)
320 {
321 }
322 
323 static inline void
324 i915_error_state_store(struct i915_gpu_coredump *error)
325 {
326 }
327 
328 static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
329 {
330 }
331 
332 static inline struct i915_gpu_coredump *
333 i915_first_error_state(struct drm_i915_private *i915)
334 {
335 	return ERR_PTR(-ENODEV);
336 }
337 
338 static inline void i915_reset_error_state(struct drm_i915_private *i915)
339 {
340 }
341 
342 static inline void i915_disable_error_state(struct drm_i915_private *i915,
343 					    int err)
344 {
345 }
346 
347 #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
348 
349 #endif /* _I915_GPU_ERROR_H_ */
350