1d897a111SMichal Wajdeczko /* 2d897a111SMichal Wajdeczko * SPDX-License-Identifier: MIT 3d897a111SMichal Wajdeczko * 4e52e4a31SMauro Carvalho Chehab * Copyright © 2008-2018 Intel Corporation 5d897a111SMichal Wajdeczko */ 6d897a111SMichal Wajdeczko 7d897a111SMichal Wajdeczko #ifndef _I915_GPU_ERROR_H_ 8d897a111SMichal Wajdeczko #define _I915_GPU_ERROR_H_ 9d897a111SMichal Wajdeczko 10cb823ed9SChris Wilson #include <linux/atomic.h> 11d897a111SMichal Wajdeczko #include <linux/kref.h> 12d897a111SMichal Wajdeczko #include <linux/ktime.h> 13d897a111SMichal Wajdeczko #include <linux/sched.h> 14d897a111SMichal Wajdeczko 15d897a111SMichal Wajdeczko #include <drm/drm_mm.h> 16d897a111SMichal Wajdeczko 17112ed2d3SChris Wilson #include "gt/intel_engine.h" 18792592e7SDaniele Ceraolo Spurio #include "gt/intel_gt_types.h" 190f261b24SDaniele Ceraolo Spurio #include "gt/uc/intel_uc_fw.h" 20112ed2d3SChris Wilson 21d897a111SMichal Wajdeczko #include "intel_device_info.h" 22d897a111SMichal Wajdeczko 23d897a111SMichal Wajdeczko #include "i915_gem.h" 24d897a111SMichal Wajdeczko #include "i915_gem_gtt.h" 25d897a111SMichal Wajdeczko #include "i915_params.h" 26b7268c5eSChris Wilson #include "i915_scheduler.h" 27d897a111SMichal Wajdeczko 28d897a111SMichal Wajdeczko struct drm_i915_private; 29742379c0SChris Wilson struct i915_vma_compress; 30742379c0SChris Wilson struct intel_engine_capture_vma; 31d897a111SMichal Wajdeczko struct intel_overlay_error_state; 32d897a111SMichal Wajdeczko 33742379c0SChris Wilson struct i915_vma_coredump { 34742379c0SChris Wilson struct i915_vma_coredump *next; 35d897a111SMichal Wajdeczko 36742379c0SChris Wilson char name[20]; 37d897a111SMichal Wajdeczko 38742379c0SChris Wilson u64 gtt_offset; 39742379c0SChris Wilson u64 gtt_size; 40742379c0SChris Wilson u32 gtt_page_sizes; 41d897a111SMichal Wajdeczko 42742379c0SChris Wilson int unused; 43e45b98baSThomas Hellström struct list_head page_list; 44742379c0SChris Wilson }; 45d897a111SMichal Wajdeczko 46742379c0SChris Wilson struct i915_request_coredump { 47742379c0SChris Wilson unsigned long flags; 48742379c0SChris Wilson pid_t pid; 49742379c0SChris Wilson u32 context; 50742379c0SChris Wilson u32 seqno; 51742379c0SChris Wilson u32 head; 52742379c0SChris Wilson u32 tail; 53742379c0SChris Wilson struct i915_sched_attr sched_attr; 54742379c0SChris Wilson }; 55d897a111SMichal Wajdeczko 56a6f0f9cfSAlan Previn struct __guc_capture_parsed_output; 57a6f0f9cfSAlan Previn 58742379c0SChris Wilson struct intel_engine_coredump { 59c990b4c3SChris Wilson const struct intel_engine_cs *engine; 60c990b4c3SChris Wilson 61bda30024STvrtko Ursulin bool hung; 62742379c0SChris Wilson bool simulated; 63d897a111SMichal Wajdeczko u32 reset_count; 64d897a111SMichal Wajdeczko 65d897a111SMichal Wajdeczko /* position of active request inside the ring */ 66d897a111SMichal Wajdeczko u32 rq_head, rq_post, rq_tail; 67d897a111SMichal Wajdeczko 68d897a111SMichal Wajdeczko /* Register state */ 69742379c0SChris Wilson u32 ccid; 70d897a111SMichal Wajdeczko u32 start; 71d897a111SMichal Wajdeczko u32 tail; 72d897a111SMichal Wajdeczko u32 head; 73d897a111SMichal Wajdeczko u32 ctl; 74d897a111SMichal Wajdeczko u32 mode; 75d897a111SMichal Wajdeczko u32 hws; 76d897a111SMichal Wajdeczko u32 ipeir; 77d897a111SMichal Wajdeczko u32 ipehr; 7870a76a9bSChris Wilson u32 esr; 79d897a111SMichal Wajdeczko u32 bbstate; 80d897a111SMichal Wajdeczko u32 instpm; 81d897a111SMichal Wajdeczko u32 instps; 82d897a111SMichal Wajdeczko u64 bbaddr; 83d897a111SMichal Wajdeczko u64 acthd; 84d897a111SMichal Wajdeczko u32 fault_reg; 85d897a111SMichal Wajdeczko u64 faddr; 86d897a111SMichal Wajdeczko u32 rc_psmi; /* sleep state */ 87d897a111SMichal Wajdeczko struct intel_instdone instdone; 88d897a111SMichal Wajdeczko 89a6f0f9cfSAlan Previn /* GuC matched capture-lists info */ 90a6f0f9cfSAlan Previn struct intel_guc_state_capture *capture; 91a6f0f9cfSAlan Previn struct __guc_capture_parsed_output *guc_capture_node; 92a6f0f9cfSAlan Previn 93742379c0SChris Wilson struct i915_gem_context_coredump { 94d897a111SMichal Wajdeczko char comm[TASK_COMM_LEN]; 951883a0a4STvrtko Ursulin 961883a0a4STvrtko Ursulin u64 total_runtime; 97*bb6287cbSTvrtko Ursulin u64 avg_runtime; 981883a0a4STvrtko Ursulin 99d897a111SMichal Wajdeczko pid_t pid; 100d897a111SMichal Wajdeczko int active; 101d897a111SMichal Wajdeczko int guilty; 102b7268c5eSChris Wilson struct i915_sched_attr sched_attr; 103d897a111SMichal Wajdeczko } context; 104d897a111SMichal Wajdeczko 105742379c0SChris Wilson struct i915_vma_coredump *vma; 106d897a111SMichal Wajdeczko 1071a8585bdSChris Wilson struct i915_request_coredump execlist[EXECLIST_MAX_PORTS]; 108d897a111SMichal Wajdeczko unsigned int num_ports; 109d897a111SMichal Wajdeczko 110d897a111SMichal Wajdeczko struct { 111d897a111SMichal Wajdeczko u32 gfx_mode; 112d897a111SMichal Wajdeczko union { 113d897a111SMichal Wajdeczko u64 pdp[4]; 114d897a111SMichal Wajdeczko u32 pp_dir_base; 115d897a111SMichal Wajdeczko }; 116d897a111SMichal Wajdeczko } vm_info; 117c990b4c3SChris Wilson 118742379c0SChris Wilson struct intel_engine_coredump *next; 119742379c0SChris Wilson }; 120742379c0SChris Wilson 121742379c0SChris Wilson struct intel_gt_coredump { 122742379c0SChris Wilson const struct intel_gt *_gt; 123742379c0SChris Wilson bool awake; 124742379c0SChris Wilson bool simulated; 125742379c0SChris Wilson 126792592e7SDaniele Ceraolo Spurio struct intel_gt_info info; 127792592e7SDaniele Ceraolo Spurio 128742379c0SChris Wilson /* Generic register state */ 129742379c0SChris Wilson u32 eir; 130742379c0SChris Wilson u32 pgtbl_er; 131742379c0SChris Wilson u32 ier; 132742379c0SChris Wilson u32 gtier[6], ngtier; 133742379c0SChris Wilson u32 forcewake; 134742379c0SChris Wilson u32 error; /* gen6+ */ 135742379c0SChris Wilson u32 err_int; /* gen7 */ 136742379c0SChris Wilson u32 fault_data0; /* gen8, gen9 */ 137742379c0SChris Wilson u32 fault_data1; /* gen8, gen9 */ 138742379c0SChris Wilson u32 done_reg; 139742379c0SChris Wilson u32 gac_eco; 140742379c0SChris Wilson u32 gam_ecochk; 141742379c0SChris Wilson u32 gab_ctl; 142742379c0SChris Wilson u32 gfx_mode; 143742379c0SChris Wilson u32 gtt_cache; 144742379c0SChris Wilson u32 aux_err; /* gen12 */ 145742379c0SChris Wilson u32 gam_done; /* gen12 */ 146742379c0SChris Wilson 147a6f0f9cfSAlan Previn /* Display related */ 148a6f0f9cfSAlan Previn u32 derrmr; 149a6f0f9cfSAlan Previn u32 sfc_done[I915_MAX_SFC]; /* gen12 */ 150a6f0f9cfSAlan Previn 151742379c0SChris Wilson u32 nfence; 152742379c0SChris Wilson u64 fence[I915_MAX_NUM_FENCES]; 153742379c0SChris Wilson 154742379c0SChris Wilson struct intel_engine_coredump *engine; 155742379c0SChris Wilson 156742379c0SChris Wilson struct intel_uc_coredump { 157742379c0SChris Wilson struct intel_uc_fw guc_fw; 158742379c0SChris Wilson struct intel_uc_fw huc_fw; 159742379c0SChris Wilson struct i915_vma_coredump *guc_log; 160a6f0f9cfSAlan Previn bool is_guc_capture; 161742379c0SChris Wilson } *uc; 162742379c0SChris Wilson 163742379c0SChris Wilson struct intel_gt_coredump *next; 164742379c0SChris Wilson }; 165742379c0SChris Wilson 166742379c0SChris Wilson struct i915_gpu_coredump { 167742379c0SChris Wilson struct kref ref; 168742379c0SChris Wilson ktime_t time; 169742379c0SChris Wilson ktime_t boottime; 170742379c0SChris Wilson ktime_t uptime; 171742379c0SChris Wilson unsigned long capture; 172742379c0SChris Wilson 173742379c0SChris Wilson struct drm_i915_private *i915; 174742379c0SChris Wilson 175742379c0SChris Wilson struct intel_gt_coredump *gt; 176742379c0SChris Wilson 177742379c0SChris Wilson char error_msg[128]; 178742379c0SChris Wilson bool simulated; 179742379c0SChris Wilson bool wakelock; 180742379c0SChris Wilson bool suspended; 181742379c0SChris Wilson int iommu; 182742379c0SChris Wilson u32 reset_count; 183742379c0SChris Wilson u32 suspend_count; 184742379c0SChris Wilson 185742379c0SChris Wilson struct intel_device_info device_info; 186742379c0SChris Wilson struct intel_runtime_info runtime_info; 187742379c0SChris Wilson struct intel_driver_caps driver_caps; 188742379c0SChris Wilson struct i915_params params; 189742379c0SChris Wilson 190742379c0SChris Wilson struct intel_overlay_error_state *overlay; 191d897a111SMichal Wajdeczko 1920e39037bSChris Wilson struct scatterlist *sgl, *fit; 193d897a111SMichal Wajdeczko }; 194d897a111SMichal Wajdeczko 195d897a111SMichal Wajdeczko struct i915_gpu_error { 196d897a111SMichal Wajdeczko /* For reset and error_state handling. */ 197d897a111SMichal Wajdeczko spinlock_t lock; 198d897a111SMichal Wajdeczko /* Protected by the above dev->gpu_error.lock. */ 199742379c0SChris Wilson struct i915_gpu_coredump *first_error; 200d897a111SMichal Wajdeczko 201d897a111SMichal Wajdeczko atomic_t pending_fb_pin; 202d897a111SMichal Wajdeczko 2032caffbf1SChris Wilson /** Number of times the device has been reset (global) */ 204cb823ed9SChris Wilson atomic_t reset_count; 2052caffbf1SChris Wilson 206d897a111SMichal Wajdeczko /** Number of times an engine has been reset */ 207cb823ed9SChris Wilson atomic_t reset_engine_count[I915_NUM_ENGINES]; 208d897a111SMichal Wajdeczko }; 209d897a111SMichal Wajdeczko 210d897a111SMichal Wajdeczko struct drm_i915_error_state_buf { 211d897a111SMichal Wajdeczko struct drm_i915_private *i915; 2120e39037bSChris Wilson struct scatterlist *sgl, *cur, *end; 2130e39037bSChris Wilson 2140e39037bSChris Wilson char *buf; 2150e39037bSChris Wilson size_t bytes; 2160e39037bSChris Wilson size_t size; 2170e39037bSChris Wilson loff_t iter; 2180e39037bSChris Wilson 219d897a111SMichal Wajdeczko int err; 220d897a111SMichal Wajdeczko }; 221d897a111SMichal Wajdeczko 222f9bf77dfSJani Nikula static inline u32 i915_reset_count(struct i915_gpu_error *error) 223f9bf77dfSJani Nikula { 224f9bf77dfSJani Nikula return atomic_read(&error->reset_count); 225f9bf77dfSJani Nikula } 226f9bf77dfSJani Nikula 227f9bf77dfSJani Nikula static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, 228f9bf77dfSJani Nikula const struct intel_engine_cs *engine) 229f9bf77dfSJani Nikula { 230f9bf77dfSJani Nikula return atomic_read(&error->reset_engine_count[engine->uabi_class]); 231f9bf77dfSJani Nikula } 232f9bf77dfSJani Nikula 233a6f0f9cfSAlan Previn #define CORE_DUMP_FLAG_NONE 0x0 234a6f0f9cfSAlan Previn #define CORE_DUMP_FLAG_IS_GUC_CAPTURE BIT(0) 235a6f0f9cfSAlan Previn 236d897a111SMichal Wajdeczko #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) 237d897a111SMichal Wajdeczko 238d897a111SMichal Wajdeczko __printf(2, 3) 239d897a111SMichal Wajdeczko void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); 240a0f1f7b4SAlan Previn void intel_gpu_error_print_vma(struct drm_i915_error_state_buf *m, 241a0f1f7b4SAlan Previn const struct intel_engine_cs *engine, 242a0f1f7b4SAlan Previn const struct i915_vma_coredump *vma); 243a0f1f7b4SAlan Previn struct i915_vma_coredump * 244a0f1f7b4SAlan Previn intel_gpu_error_find_batch(const struct intel_engine_coredump *ee); 245d897a111SMichal Wajdeczko 246bda30024STvrtko Ursulin struct i915_gpu_coredump *i915_gpu_coredump(struct intel_gt *gt, 247a6f0f9cfSAlan Previn intel_engine_mask_t engine_mask, u32 dump_flags); 248bda30024STvrtko Ursulin void i915_capture_error_state(struct intel_gt *gt, 249a6f0f9cfSAlan Previn intel_engine_mask_t engine_mask, u32 dump_flags); 250d897a111SMichal Wajdeczko 251742379c0SChris Wilson struct i915_gpu_coredump * 252742379c0SChris Wilson i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp); 253742379c0SChris Wilson 254742379c0SChris Wilson struct intel_gt_coredump * 255a6f0f9cfSAlan Previn intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags); 256742379c0SChris Wilson 257742379c0SChris Wilson struct intel_engine_coredump * 258a6f0f9cfSAlan Previn intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags); 259742379c0SChris Wilson 260742379c0SChris Wilson struct intel_engine_capture_vma * 261742379c0SChris Wilson intel_engine_coredump_add_request(struct intel_engine_coredump *ee, 262742379c0SChris Wilson struct i915_request *rq, 263742379c0SChris Wilson gfp_t gfp); 264742379c0SChris Wilson 265742379c0SChris Wilson void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, 266742379c0SChris Wilson struct intel_engine_capture_vma *capture, 267742379c0SChris Wilson struct i915_vma_compress *compress); 268742379c0SChris Wilson 269742379c0SChris Wilson struct i915_vma_compress * 270742379c0SChris Wilson i915_vma_capture_prepare(struct intel_gt_coredump *gt); 271742379c0SChris Wilson 272742379c0SChris Wilson void i915_vma_capture_finish(struct intel_gt_coredump *gt, 273742379c0SChris Wilson struct i915_vma_compress *compress); 274742379c0SChris Wilson 275742379c0SChris Wilson void i915_error_state_store(struct i915_gpu_coredump *error); 276742379c0SChris Wilson 277742379c0SChris Wilson static inline struct i915_gpu_coredump * 278742379c0SChris Wilson i915_gpu_coredump_get(struct i915_gpu_coredump *gpu) 279d897a111SMichal Wajdeczko { 280d897a111SMichal Wajdeczko kref_get(&gpu->ref); 281d897a111SMichal Wajdeczko return gpu; 282d897a111SMichal Wajdeczko } 283d897a111SMichal Wajdeczko 284742379c0SChris Wilson ssize_t 285742379c0SChris Wilson i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error, 2860e39037bSChris Wilson char *buf, loff_t offset, size_t count); 2870e39037bSChris Wilson 288742379c0SChris Wilson void __i915_gpu_coredump_free(struct kref *kref); 289742379c0SChris Wilson static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu) 290d897a111SMichal Wajdeczko { 291d897a111SMichal Wajdeczko if (gpu) 292742379c0SChris Wilson kref_put(&gpu->ref, __i915_gpu_coredump_free); 293d897a111SMichal Wajdeczko } 294d897a111SMichal Wajdeczko 295742379c0SChris Wilson struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915); 296d897a111SMichal Wajdeczko void i915_reset_error_state(struct drm_i915_private *i915); 297fb6f0b64SChris Wilson void i915_disable_error_state(struct drm_i915_private *i915, int err); 298d897a111SMichal Wajdeczko 299d897a111SMichal Wajdeczko #else 300d897a111SMichal Wajdeczko 301bda30024STvrtko Ursulin static inline void 302a6f0f9cfSAlan Previn i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags) 303d897a111SMichal Wajdeczko { 304d897a111SMichal Wajdeczko } 305d897a111SMichal Wajdeczko 306742379c0SChris Wilson static inline struct i915_gpu_coredump * 307742379c0SChris Wilson i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp) 308742379c0SChris Wilson { 309742379c0SChris Wilson return NULL; 310742379c0SChris Wilson } 311742379c0SChris Wilson 312742379c0SChris Wilson static inline struct intel_gt_coredump * 313a6f0f9cfSAlan Previn intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags) 314742379c0SChris Wilson { 315742379c0SChris Wilson return NULL; 316742379c0SChris Wilson } 317742379c0SChris Wilson 318742379c0SChris Wilson static inline struct intel_engine_coredump * 319a6f0f9cfSAlan Previn intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags) 320742379c0SChris Wilson { 321742379c0SChris Wilson return NULL; 322742379c0SChris Wilson } 323742379c0SChris Wilson 324742379c0SChris Wilson static inline struct intel_engine_capture_vma * 325742379c0SChris Wilson intel_engine_coredump_add_request(struct intel_engine_coredump *ee, 326742379c0SChris Wilson struct i915_request *rq, 327742379c0SChris Wilson gfp_t gfp) 328742379c0SChris Wilson { 329742379c0SChris Wilson return NULL; 330742379c0SChris Wilson } 331742379c0SChris Wilson 332742379c0SChris Wilson static inline void 333742379c0SChris Wilson intel_engine_coredump_add_vma(struct intel_engine_coredump *ee, 334742379c0SChris Wilson struct intel_engine_capture_vma *capture, 335742379c0SChris Wilson struct i915_vma_compress *compress) 336742379c0SChris Wilson { 337742379c0SChris Wilson } 338742379c0SChris Wilson 339742379c0SChris Wilson static inline struct i915_vma_compress * 340d713e3abSChris Wilson i915_vma_capture_prepare(struct intel_gt_coredump *gt) 341742379c0SChris Wilson { 342742379c0SChris Wilson return NULL; 343742379c0SChris Wilson } 344742379c0SChris Wilson 34559be9b9cSZhang Xiaoxu static inline void 34659be9b9cSZhang Xiaoxu i915_vma_capture_finish(struct intel_gt_coredump *gt, 347d713e3abSChris Wilson struct i915_vma_compress *compress) 348742379c0SChris Wilson { 349742379c0SChris Wilson } 350742379c0SChris Wilson 351742379c0SChris Wilson static inline void 35204062c58SZhang Xiaoxu i915_error_state_store(struct i915_gpu_coredump *error) 353742379c0SChris Wilson { 354742379c0SChris Wilson } 355742379c0SChris Wilson 3567e36505dSChris Wilson static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu) 3577e36505dSChris Wilson { 3587e36505dSChris Wilson } 3597e36505dSChris Wilson 360742379c0SChris Wilson static inline struct i915_gpu_coredump * 361d897a111SMichal Wajdeczko i915_first_error_state(struct drm_i915_private *i915) 362d897a111SMichal Wajdeczko { 363fb6f0b64SChris Wilson return ERR_PTR(-ENODEV); 364d897a111SMichal Wajdeczko } 365d897a111SMichal Wajdeczko 366d897a111SMichal Wajdeczko static inline void i915_reset_error_state(struct drm_i915_private *i915) 367d897a111SMichal Wajdeczko { 368d897a111SMichal Wajdeczko } 369d897a111SMichal Wajdeczko 370fb6f0b64SChris Wilson static inline void i915_disable_error_state(struct drm_i915_private *i915, 371fb6f0b64SChris Wilson int err) 372fb6f0b64SChris Wilson { 373fb6f0b64SChris Wilson } 374fb6f0b64SChris Wilson 375d897a111SMichal Wajdeczko #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */ 376d897a111SMichal Wajdeczko 377d897a111SMichal Wajdeczko #endif /* _I915_GPU_ERROR_H_ */ 378