1  /*
2   * SPDX-License-Identifier: MIT
3   *
4   * Copyright © 2008-2018 Intel Corporation
5   */
6  
7  #ifndef _I915_GPU_ERROR_H_
8  #define _I915_GPU_ERROR_H_
9  
10  #include <linux/atomic.h>
11  #include <linux/kref.h>
12  #include <linux/ktime.h>
13  #include <linux/sched.h>
14  
15  #include <drm/drm_mm.h>
16  
17  #include "display/intel_display_device.h"
18  #include "display/intel_display_params.h"
19  #include "gt/intel_engine.h"
20  #include "gt/intel_engine_types.h"
21  #include "gt/intel_gt_types.h"
22  #include "gt/uc/intel_uc_fw.h"
23  
24  #include "intel_device_info.h"
25  
26  #include "i915_gem.h"
27  #include "i915_gem_gtt.h"
28  #include "i915_params.h"
29  #include "i915_scheduler.h"
30  
31  struct drm_i915_private;
32  struct i915_vma_compress;
33  struct intel_engine_capture_vma;
34  struct intel_overlay_error_state;
35  
36  struct i915_vma_coredump {
37  	struct i915_vma_coredump *next;
38  
39  	char name[20];
40  
41  	u64 gtt_offset;
42  	u64 gtt_size;
43  	u32 gtt_page_sizes;
44  
45  	int unused;
46  	struct list_head page_list;
47  };
48  
49  struct i915_request_coredump {
50  	unsigned long flags;
51  	pid_t pid;
52  	u32 context;
53  	u32 seqno;
54  	u32 head;
55  	u32 tail;
56  	struct i915_sched_attr sched_attr;
57  };
58  
59  struct __guc_capture_parsed_output;
60  
61  struct intel_engine_coredump {
62  	const struct intel_engine_cs *engine;
63  
64  	bool hung;
65  	bool simulated;
66  	u32 reset_count;
67  
68  	/* position of active request inside the ring */
69  	u32 rq_head, rq_post, rq_tail;
70  
71  	/* Register state */
72  	u32 ccid;
73  	u32 start;
74  	u32 tail;
75  	u32 head;
76  	u32 ctl;
77  	u32 mode;
78  	u32 hws;
79  	u32 ipeir;
80  	u32 ipehr;
81  	u32 esr;
82  	u32 bbstate;
83  	u32 instpm;
84  	u32 instps;
85  	u64 bbaddr;
86  	u64 acthd;
87  	u32 fault_reg;
88  	u64 faddr;
89  	u32 rc_psmi; /* sleep state */
90  	u32 nopid;
91  	u32 excc;
92  	u32 cmd_cctl;
93  	u32 cscmdop;
94  	u32 ctx_sr_ctl;
95  	u32 dma_faddr_hi;
96  	u32 dma_faddr_lo;
97  	struct intel_instdone instdone;
98  
99  	/* GuC matched capture-lists info */
100  	struct intel_guc_state_capture *guc_capture;
101  	struct __guc_capture_parsed_output *guc_capture_node;
102  
103  	struct i915_gem_context_coredump {
104  		char comm[TASK_COMM_LEN];
105  
106  		u64 total_runtime;
107  		u64 avg_runtime;
108  
109  		pid_t pid;
110  		int active;
111  		int guilty;
112  		struct i915_sched_attr sched_attr;
113  		u32 hwsp_seqno;
114  	} context;
115  
116  	struct i915_vma_coredump *vma;
117  
118  	struct i915_request_coredump execlist[EXECLIST_MAX_PORTS];
119  	unsigned int num_ports;
120  
121  	struct {
122  		u32 gfx_mode;
123  		union {
124  			u64 pdp[4];
125  			u32 pp_dir_base;
126  		};
127  	} vm_info;
128  
129  	struct intel_engine_coredump *next;
130  };
131  
132  struct intel_ctb_coredump {
133  	u32 raw_head, head;
134  	u32 raw_tail, tail;
135  	u32 raw_status;
136  	u32 desc_offset;
137  	u32 cmds_offset;
138  	u32 size;
139  };
140  
141  struct intel_gt_coredump {
142  	const struct intel_gt *_gt;
143  	bool awake;
144  	bool simulated;
145  
146  	struct intel_gt_info info;
147  
148  	/* Generic register state */
149  	u32 eir;
150  	u32 pgtbl_er;
151  	u32 ier;
152  	u32 gtier[6], ngtier;
153  	u32 forcewake;
154  	u32 error; /* gen6+ */
155  	u32 err_int; /* gen7 */
156  	u32 fault_data0; /* gen8, gen9 */
157  	u32 fault_data1; /* gen8, gen9 */
158  	u32 done_reg;
159  	u32 gac_eco;
160  	u32 gam_ecochk;
161  	u32 gab_ctl;
162  	u32 gfx_mode;
163  	u32 gtt_cache;
164  	u32 aux_err; /* gen12 */
165  	u32 gam_done; /* gen12 */
166  	u32 clock_frequency;
167  	u32 clock_period_ns;
168  
169  	/* Display related */
170  	u32 derrmr;
171  	u32 sfc_done[I915_MAX_SFC]; /* gen12 */
172  
173  	u32 nfence;
174  	u64 fence[I915_MAX_NUM_FENCES];
175  
176  	struct intel_engine_coredump *engine;
177  
178  	struct intel_uc_coredump {
179  		struct intel_uc_fw guc_fw;
180  		struct intel_uc_fw huc_fw;
181  		struct guc_info {
182  			struct intel_ctb_coredump ctb[2];
183  			struct i915_vma_coredump *vma_ctb;
184  			struct i915_vma_coredump *vma_log;
185  			u32 timestamp;
186  			u16 last_fence;
187  			bool is_guc_capture;
188  		} guc;
189  	} *uc;
190  
191  	struct intel_gt_coredump *next;
192  };
193  
194  struct i915_gpu_coredump {
195  	struct kref ref;
196  	ktime_t time;
197  	ktime_t boottime;
198  	ktime_t uptime;
199  	unsigned long capture;
200  
201  	struct drm_i915_private *i915;
202  
203  	struct intel_gt_coredump *gt;
204  
205  	char error_msg[128];
206  	bool simulated;
207  	bool wakelock;
208  	bool suspended;
209  	int iommu;
210  	u32 reset_count;
211  	u32 suspend_count;
212  
213  	struct intel_device_info device_info;
214  	struct intel_runtime_info runtime_info;
215  	struct intel_display_device_info display_device_info;
216  	struct intel_display_runtime_info display_runtime_info;
217  	struct intel_driver_caps driver_caps;
218  	struct i915_params params;
219  	struct intel_display_params display_params;
220  
221  	struct intel_overlay_error_state *overlay;
222  
223  	struct scatterlist *sgl, *fit;
224  };
225  
226  struct i915_gpu_error {
227  	/* For reset and error_state handling. */
228  	spinlock_t lock;
229  	/* Protected by the above dev->gpu_error.lock. */
230  	struct i915_gpu_coredump *first_error;
231  
232  	atomic_t pending_fb_pin;
233  
234  	/** Number of times the device has been reset (global) */
235  	atomic_t reset_count;
236  
237  	/** Number of times an engine has been reset */
238  	atomic_t reset_engine_count[MAX_ENGINE_CLASS];
239  };
240  
241  struct drm_i915_error_state_buf {
242  	struct drm_i915_private *i915;
243  	struct scatterlist *sgl, *cur, *end;
244  
245  	char *buf;
246  	size_t bytes;
247  	size_t size;
248  	loff_t iter;
249  
250  	int err;
251  };
252  
i915_reset_count(struct i915_gpu_error * error)253  static inline u32 i915_reset_count(struct i915_gpu_error *error)
254  {
255  	return atomic_read(&error->reset_count);
256  }
257  
i915_reset_engine_count(struct i915_gpu_error * error,const struct intel_engine_cs * engine)258  static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
259  					  const struct intel_engine_cs *engine)
260  {
261  	return atomic_read(&error->reset_engine_count[engine->class]);
262  }
263  
264  static inline void
i915_increase_reset_engine_count(struct i915_gpu_error * error,const struct intel_engine_cs * engine)265  i915_increase_reset_engine_count(struct i915_gpu_error *error,
266  				 const struct intel_engine_cs *engine)
267  {
268  	atomic_inc(&error->reset_engine_count[engine->class]);
269  }
270  
271  #define CORE_DUMP_FLAG_NONE           0x0
272  #define CORE_DUMP_FLAG_IS_GUC_CAPTURE BIT(0)
273  
274  #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) && IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
275  void intel_klog_error_capture(struct intel_gt *gt,
276  			      intel_engine_mask_t engine_mask);
277  #else
intel_klog_error_capture(struct intel_gt * gt,intel_engine_mask_t engine_mask)278  static inline void intel_klog_error_capture(struct intel_gt *gt,
279  					    intel_engine_mask_t engine_mask)
280  {
281  }
282  #endif
283  
284  #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
285  
286  __printf(2, 3)
287  void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
288  
289  void i915_capture_error_state(struct intel_gt *gt,
290  			      intel_engine_mask_t engine_mask, u32 dump_flags);
291  
292  struct i915_gpu_coredump *
293  i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp);
294  
295  struct intel_gt_coredump *
296  intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags);
297  
298  struct intel_engine_coredump *
299  intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags);
300  
301  struct intel_engine_capture_vma *
302  intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
303  				  struct i915_request *rq,
304  				  gfp_t gfp);
305  
306  void intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
307  				   struct intel_engine_capture_vma *capture,
308  				   struct i915_vma_compress *compress);
309  
310  struct i915_vma_compress *
311  i915_vma_capture_prepare(struct intel_gt_coredump *gt);
312  
313  void i915_vma_capture_finish(struct intel_gt_coredump *gt,
314  			     struct i915_vma_compress *compress);
315  
316  void i915_error_state_store(struct i915_gpu_coredump *error);
317  
318  static inline struct i915_gpu_coredump *
i915_gpu_coredump_get(struct i915_gpu_coredump * gpu)319  i915_gpu_coredump_get(struct i915_gpu_coredump *gpu)
320  {
321  	kref_get(&gpu->ref);
322  	return gpu;
323  }
324  
325  ssize_t
326  i915_gpu_coredump_copy_to_buffer(struct i915_gpu_coredump *error,
327  				 char *buf, loff_t offset, size_t count);
328  
329  void __i915_gpu_coredump_free(struct kref *kref);
i915_gpu_coredump_put(struct i915_gpu_coredump * gpu)330  static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
331  {
332  	if (gpu)
333  		kref_put(&gpu->ref, __i915_gpu_coredump_free);
334  }
335  
336  void i915_reset_error_state(struct drm_i915_private *i915);
337  void i915_disable_error_state(struct drm_i915_private *i915, int err);
338  
339  void i915_gpu_error_debugfs_register(struct drm_i915_private *i915);
340  void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915);
341  void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915);
342  
343  #else
344  
345  __printf(2, 3)
346  static inline void
i915_error_printf(struct drm_i915_error_state_buf * e,const char * f,...)347  i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
348  {
349  }
350  
351  static inline void
i915_capture_error_state(struct intel_gt * gt,intel_engine_mask_t engine_mask,u32 dump_flags)352  i915_capture_error_state(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 dump_flags)
353  {
354  }
355  
356  static inline struct i915_gpu_coredump *
i915_gpu_coredump_alloc(struct drm_i915_private * i915,gfp_t gfp)357  i915_gpu_coredump_alloc(struct drm_i915_private *i915, gfp_t gfp)
358  {
359  	return NULL;
360  }
361  
362  static inline struct intel_gt_coredump *
intel_gt_coredump_alloc(struct intel_gt * gt,gfp_t gfp,u32 dump_flags)363  intel_gt_coredump_alloc(struct intel_gt *gt, gfp_t gfp, u32 dump_flags)
364  {
365  	return NULL;
366  }
367  
368  static inline struct intel_engine_coredump *
intel_engine_coredump_alloc(struct intel_engine_cs * engine,gfp_t gfp,u32 dump_flags)369  intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_flags)
370  {
371  	return NULL;
372  }
373  
374  static inline struct intel_engine_capture_vma *
intel_engine_coredump_add_request(struct intel_engine_coredump * ee,struct i915_request * rq,gfp_t gfp)375  intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
376  				  struct i915_request *rq,
377  				  gfp_t gfp)
378  {
379  	return NULL;
380  }
381  
382  static inline void
intel_engine_coredump_add_vma(struct intel_engine_coredump * ee,struct intel_engine_capture_vma * capture,struct i915_vma_compress * compress)383  intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
384  			      struct intel_engine_capture_vma *capture,
385  			      struct i915_vma_compress *compress)
386  {
387  }
388  
389  static inline struct i915_vma_compress *
i915_vma_capture_prepare(struct intel_gt_coredump * gt)390  i915_vma_capture_prepare(struct intel_gt_coredump *gt)
391  {
392  	return NULL;
393  }
394  
395  static inline void
i915_vma_capture_finish(struct intel_gt_coredump * gt,struct i915_vma_compress * compress)396  i915_vma_capture_finish(struct intel_gt_coredump *gt,
397  			struct i915_vma_compress *compress)
398  {
399  }
400  
401  static inline void
i915_error_state_store(struct i915_gpu_coredump * error)402  i915_error_state_store(struct i915_gpu_coredump *error)
403  {
404  }
405  
i915_gpu_coredump_put(struct i915_gpu_coredump * gpu)406  static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
407  {
408  }
409  
i915_reset_error_state(struct drm_i915_private * i915)410  static inline void i915_reset_error_state(struct drm_i915_private *i915)
411  {
412  }
413  
i915_disable_error_state(struct drm_i915_private * i915,int err)414  static inline void i915_disable_error_state(struct drm_i915_private *i915,
415  					    int err)
416  {
417  }
418  
i915_gpu_error_debugfs_register(struct drm_i915_private * i915)419  static inline void i915_gpu_error_debugfs_register(struct drm_i915_private *i915)
420  {
421  }
422  
i915_gpu_error_sysfs_setup(struct drm_i915_private * i915)423  static inline void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915)
424  {
425  }
426  
i915_gpu_error_sysfs_teardown(struct drm_i915_private * i915)427  static inline void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915)
428  {
429  }
430  
431  #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
432  
433  #endif /* _I915_GPU_ERROR_H_ */
434