1  // SPDX-License-Identifier: MIT
2  /*
3   * Copyright © 2016 Intel Corporation
4   */
5  
6  #include <linux/string_helpers.h>
7  
8  #include <drm/drm_print.h>
9  
10  #include "gem/i915_gem_context.h"
11  #include "gem/i915_gem_internal.h"
12  #include "gt/intel_gt_print.h"
13  #include "gt/intel_gt_regs.h"
14  
15  #include "i915_cmd_parser.h"
16  #include "i915_drv.h"
17  #include "i915_irq.h"
18  #include "i915_reg.h"
19  #include "intel_breadcrumbs.h"
20  #include "intel_context.h"
21  #include "intel_engine.h"
22  #include "intel_engine_pm.h"
23  #include "intel_engine_regs.h"
24  #include "intel_engine_user.h"
25  #include "intel_execlists_submission.h"
26  #include "intel_gt.h"
27  #include "intel_gt_mcr.h"
28  #include "intel_gt_pm.h"
29  #include "intel_gt_requests.h"
30  #include "intel_lrc.h"
31  #include "intel_lrc_reg.h"
32  #include "intel_reset.h"
33  #include "intel_ring.h"
34  #include "uc/intel_guc_submission.h"
35  
36  /* Haswell does have the CXT_SIZE register however it does not appear to be
37   * valid. Now, docs explain in dwords what is in the context object. The full
38   * size is 70720 bytes, however, the power context and execlist context will
39   * never be saved (power context is stored elsewhere, and execlists don't work
40   * on HSW) - so the final size, including the extra state required for the
41   * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
42   */
43  #define HSW_CXT_TOTAL_SIZE		(17 * PAGE_SIZE)
44  
45  #define DEFAULT_LR_CONTEXT_RENDER_SIZE	(22 * PAGE_SIZE)
46  #define GEN8_LR_CONTEXT_RENDER_SIZE	(20 * PAGE_SIZE)
47  #define GEN9_LR_CONTEXT_RENDER_SIZE	(22 * PAGE_SIZE)
48  #define GEN11_LR_CONTEXT_RENDER_SIZE	(14 * PAGE_SIZE)
49  
50  #define GEN8_LR_CONTEXT_OTHER_SIZE	(2 * PAGE_SIZE)
51  
52  #define MAX_MMIO_BASES 3
53  struct engine_info {
54  	u8 class;
55  	u8 instance;
56  	/* mmio bases table *must* be sorted in reverse graphics_ver order */
57  	struct engine_mmio_base {
58  		u32 graphics_ver : 8;
59  		u32 base : 24;
60  	} mmio_bases[MAX_MMIO_BASES];
61  };
62  
63  static const struct engine_info intel_engines[] = {
64  	[RCS0] = {
65  		.class = RENDER_CLASS,
66  		.instance = 0,
67  		.mmio_bases = {
68  			{ .graphics_ver = 1, .base = RENDER_RING_BASE }
69  		},
70  	},
71  	[BCS0] = {
72  		.class = COPY_ENGINE_CLASS,
73  		.instance = 0,
74  		.mmio_bases = {
75  			{ .graphics_ver = 6, .base = BLT_RING_BASE }
76  		},
77  	},
78  	[BCS1] = {
79  		.class = COPY_ENGINE_CLASS,
80  		.instance = 1,
81  		.mmio_bases = {
82  			{ .graphics_ver = 12, .base = XEHPC_BCS1_RING_BASE }
83  		},
84  	},
85  	[BCS2] = {
86  		.class = COPY_ENGINE_CLASS,
87  		.instance = 2,
88  		.mmio_bases = {
89  			{ .graphics_ver = 12, .base = XEHPC_BCS2_RING_BASE }
90  		},
91  	},
92  	[BCS3] = {
93  		.class = COPY_ENGINE_CLASS,
94  		.instance = 3,
95  		.mmio_bases = {
96  			{ .graphics_ver = 12, .base = XEHPC_BCS3_RING_BASE }
97  		},
98  	},
99  	[BCS4] = {
100  		.class = COPY_ENGINE_CLASS,
101  		.instance = 4,
102  		.mmio_bases = {
103  			{ .graphics_ver = 12, .base = XEHPC_BCS4_RING_BASE }
104  		},
105  	},
106  	[BCS5] = {
107  		.class = COPY_ENGINE_CLASS,
108  		.instance = 5,
109  		.mmio_bases = {
110  			{ .graphics_ver = 12, .base = XEHPC_BCS5_RING_BASE }
111  		},
112  	},
113  	[BCS6] = {
114  		.class = COPY_ENGINE_CLASS,
115  		.instance = 6,
116  		.mmio_bases = {
117  			{ .graphics_ver = 12, .base = XEHPC_BCS6_RING_BASE }
118  		},
119  	},
120  	[BCS7] = {
121  		.class = COPY_ENGINE_CLASS,
122  		.instance = 7,
123  		.mmio_bases = {
124  			{ .graphics_ver = 12, .base = XEHPC_BCS7_RING_BASE }
125  		},
126  	},
127  	[BCS8] = {
128  		.class = COPY_ENGINE_CLASS,
129  		.instance = 8,
130  		.mmio_bases = {
131  			{ .graphics_ver = 12, .base = XEHPC_BCS8_RING_BASE }
132  		},
133  	},
134  	[VCS0] = {
135  		.class = VIDEO_DECODE_CLASS,
136  		.instance = 0,
137  		.mmio_bases = {
138  			{ .graphics_ver = 11, .base = GEN11_BSD_RING_BASE },
139  			{ .graphics_ver = 6, .base = GEN6_BSD_RING_BASE },
140  			{ .graphics_ver = 4, .base = BSD_RING_BASE }
141  		},
142  	},
143  	[VCS1] = {
144  		.class = VIDEO_DECODE_CLASS,
145  		.instance = 1,
146  		.mmio_bases = {
147  			{ .graphics_ver = 11, .base = GEN11_BSD2_RING_BASE },
148  			{ .graphics_ver = 8, .base = GEN8_BSD2_RING_BASE }
149  		},
150  	},
151  	[VCS2] = {
152  		.class = VIDEO_DECODE_CLASS,
153  		.instance = 2,
154  		.mmio_bases = {
155  			{ .graphics_ver = 11, .base = GEN11_BSD3_RING_BASE }
156  		},
157  	},
158  	[VCS3] = {
159  		.class = VIDEO_DECODE_CLASS,
160  		.instance = 3,
161  		.mmio_bases = {
162  			{ .graphics_ver = 11, .base = GEN11_BSD4_RING_BASE }
163  		},
164  	},
165  	[VCS4] = {
166  		.class = VIDEO_DECODE_CLASS,
167  		.instance = 4,
168  		.mmio_bases = {
169  			{ .graphics_ver = 12, .base = XEHP_BSD5_RING_BASE }
170  		},
171  	},
172  	[VCS5] = {
173  		.class = VIDEO_DECODE_CLASS,
174  		.instance = 5,
175  		.mmio_bases = {
176  			{ .graphics_ver = 12, .base = XEHP_BSD6_RING_BASE }
177  		},
178  	},
179  	[VCS6] = {
180  		.class = VIDEO_DECODE_CLASS,
181  		.instance = 6,
182  		.mmio_bases = {
183  			{ .graphics_ver = 12, .base = XEHP_BSD7_RING_BASE }
184  		},
185  	},
186  	[VCS7] = {
187  		.class = VIDEO_DECODE_CLASS,
188  		.instance = 7,
189  		.mmio_bases = {
190  			{ .graphics_ver = 12, .base = XEHP_BSD8_RING_BASE }
191  		},
192  	},
193  	[VECS0] = {
194  		.class = VIDEO_ENHANCEMENT_CLASS,
195  		.instance = 0,
196  		.mmio_bases = {
197  			{ .graphics_ver = 11, .base = GEN11_VEBOX_RING_BASE },
198  			{ .graphics_ver = 7, .base = VEBOX_RING_BASE }
199  		},
200  	},
201  	[VECS1] = {
202  		.class = VIDEO_ENHANCEMENT_CLASS,
203  		.instance = 1,
204  		.mmio_bases = {
205  			{ .graphics_ver = 11, .base = GEN11_VEBOX2_RING_BASE }
206  		},
207  	},
208  	[VECS2] = {
209  		.class = VIDEO_ENHANCEMENT_CLASS,
210  		.instance = 2,
211  		.mmio_bases = {
212  			{ .graphics_ver = 12, .base = XEHP_VEBOX3_RING_BASE }
213  		},
214  	},
215  	[VECS3] = {
216  		.class = VIDEO_ENHANCEMENT_CLASS,
217  		.instance = 3,
218  		.mmio_bases = {
219  			{ .graphics_ver = 12, .base = XEHP_VEBOX4_RING_BASE }
220  		},
221  	},
222  	[CCS0] = {
223  		.class = COMPUTE_CLASS,
224  		.instance = 0,
225  		.mmio_bases = {
226  			{ .graphics_ver = 12, .base = GEN12_COMPUTE0_RING_BASE }
227  		}
228  	},
229  	[CCS1] = {
230  		.class = COMPUTE_CLASS,
231  		.instance = 1,
232  		.mmio_bases = {
233  			{ .graphics_ver = 12, .base = GEN12_COMPUTE1_RING_BASE }
234  		}
235  	},
236  	[CCS2] = {
237  		.class = COMPUTE_CLASS,
238  		.instance = 2,
239  		.mmio_bases = {
240  			{ .graphics_ver = 12, .base = GEN12_COMPUTE2_RING_BASE }
241  		}
242  	},
243  	[CCS3] = {
244  		.class = COMPUTE_CLASS,
245  		.instance = 3,
246  		.mmio_bases = {
247  			{ .graphics_ver = 12, .base = GEN12_COMPUTE3_RING_BASE }
248  		}
249  	},
250  	[GSC0] = {
251  		.class = OTHER_CLASS,
252  		.instance = OTHER_GSC_INSTANCE,
253  		.mmio_bases = {
254  			{ .graphics_ver = 12, .base = MTL_GSC_RING_BASE }
255  		}
256  	},
257  };
258  
259  /**
260   * intel_engine_context_size() - return the size of the context for an engine
261   * @gt: the gt
262   * @class: engine class
263   *
264   * Each engine class may require a different amount of space for a context
265   * image.
266   *
267   * Return: size (in bytes) of an engine class specific context image
268   *
269   * Note: this size includes the HWSP, which is part of the context image
270   * in LRC mode, but does not include the "shared data page" used with
271   * GuC submission. The caller should account for this if using the GuC.
272   */
intel_engine_context_size(struct intel_gt * gt,u8 class)273  u32 intel_engine_context_size(struct intel_gt *gt, u8 class)
274  {
275  	struct intel_uncore *uncore = gt->uncore;
276  	u32 cxt_size;
277  
278  	BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
279  
280  	switch (class) {
281  	case COMPUTE_CLASS:
282  		fallthrough;
283  	case RENDER_CLASS:
284  		switch (GRAPHICS_VER(gt->i915)) {
285  		default:
286  			MISSING_CASE(GRAPHICS_VER(gt->i915));
287  			return DEFAULT_LR_CONTEXT_RENDER_SIZE;
288  		case 12:
289  		case 11:
290  			return GEN11_LR_CONTEXT_RENDER_SIZE;
291  		case 9:
292  			return GEN9_LR_CONTEXT_RENDER_SIZE;
293  		case 8:
294  			return GEN8_LR_CONTEXT_RENDER_SIZE;
295  		case 7:
296  			if (IS_HASWELL(gt->i915))
297  				return HSW_CXT_TOTAL_SIZE;
298  
299  			cxt_size = intel_uncore_read(uncore, GEN7_CXT_SIZE);
300  			return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
301  					PAGE_SIZE);
302  		case 6:
303  			cxt_size = intel_uncore_read(uncore, CXT_SIZE);
304  			return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
305  					PAGE_SIZE);
306  		case 5:
307  		case 4:
308  			/*
309  			 * There is a discrepancy here between the size reported
310  			 * by the register and the size of the context layout
311  			 * in the docs. Both are described as authorative!
312  			 *
313  			 * The discrepancy is on the order of a few cachelines,
314  			 * but the total is under one page (4k), which is our
315  			 * minimum allocation anyway so it should all come
316  			 * out in the wash.
317  			 */
318  			cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1;
319  			gt_dbg(gt, "graphics_ver = %d CXT_SIZE = %d bytes [0x%08x]\n",
320  			       GRAPHICS_VER(gt->i915), cxt_size * 64,
321  			       cxt_size - 1);
322  			return round_up(cxt_size * 64, PAGE_SIZE);
323  		case 3:
324  		case 2:
325  		/* For the special day when i810 gets merged. */
326  		case 1:
327  			return 0;
328  		}
329  		break;
330  	default:
331  		MISSING_CASE(class);
332  		fallthrough;
333  	case VIDEO_DECODE_CLASS:
334  	case VIDEO_ENHANCEMENT_CLASS:
335  	case COPY_ENGINE_CLASS:
336  	case OTHER_CLASS:
337  		if (GRAPHICS_VER(gt->i915) < 8)
338  			return 0;
339  		return GEN8_LR_CONTEXT_OTHER_SIZE;
340  	}
341  }
342  
__engine_mmio_base(struct drm_i915_private * i915,const struct engine_mmio_base * bases)343  static u32 __engine_mmio_base(struct drm_i915_private *i915,
344  			      const struct engine_mmio_base *bases)
345  {
346  	int i;
347  
348  	for (i = 0; i < MAX_MMIO_BASES; i++)
349  		if (GRAPHICS_VER(i915) >= bases[i].graphics_ver)
350  			break;
351  
352  	GEM_BUG_ON(i == MAX_MMIO_BASES);
353  	GEM_BUG_ON(!bases[i].base);
354  
355  	return bases[i].base;
356  }
357  
__sprint_engine_name(struct intel_engine_cs * engine)358  static void __sprint_engine_name(struct intel_engine_cs *engine)
359  {
360  	/*
361  	 * Before we know what the uABI name for this engine will be,
362  	 * we still would like to keep track of this engine in the debug logs.
363  	 * We throw in a ' here as a reminder that this isn't its final name.
364  	 */
365  	GEM_WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s'%u",
366  			     intel_engine_class_repr(engine->class),
367  			     engine->instance) >= sizeof(engine->name));
368  }
369  
intel_engine_set_hwsp_writemask(struct intel_engine_cs * engine,u32 mask)370  void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask)
371  {
372  	/*
373  	 * Though they added more rings on g4x/ilk, they did not add
374  	 * per-engine HWSTAM until gen6.
375  	 */
376  	if (GRAPHICS_VER(engine->i915) < 6 && engine->class != RENDER_CLASS)
377  		return;
378  
379  	if (GRAPHICS_VER(engine->i915) >= 3)
380  		ENGINE_WRITE(engine, RING_HWSTAM, mask);
381  	else
382  		ENGINE_WRITE16(engine, RING_HWSTAM, mask);
383  }
384  
intel_engine_sanitize_mmio(struct intel_engine_cs * engine)385  static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
386  {
387  	/* Mask off all writes into the unknown HWSP */
388  	intel_engine_set_hwsp_writemask(engine, ~0u);
389  }
390  
nop_irq_handler(struct intel_engine_cs * engine,u16 iir)391  static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
392  {
393  	GEM_DEBUG_WARN_ON(iir);
394  }
395  
get_reset_domain(u8 ver,enum intel_engine_id id)396  static u32 get_reset_domain(u8 ver, enum intel_engine_id id)
397  {
398  	u32 reset_domain;
399  
400  	if (ver >= 11) {
401  		static const u32 engine_reset_domains[] = {
402  			[RCS0]  = GEN11_GRDOM_RENDER,
403  			[BCS0]  = GEN11_GRDOM_BLT,
404  			[BCS1]  = XEHPC_GRDOM_BLT1,
405  			[BCS2]  = XEHPC_GRDOM_BLT2,
406  			[BCS3]  = XEHPC_GRDOM_BLT3,
407  			[BCS4]  = XEHPC_GRDOM_BLT4,
408  			[BCS5]  = XEHPC_GRDOM_BLT5,
409  			[BCS6]  = XEHPC_GRDOM_BLT6,
410  			[BCS7]  = XEHPC_GRDOM_BLT7,
411  			[BCS8]  = XEHPC_GRDOM_BLT8,
412  			[VCS0]  = GEN11_GRDOM_MEDIA,
413  			[VCS1]  = GEN11_GRDOM_MEDIA2,
414  			[VCS2]  = GEN11_GRDOM_MEDIA3,
415  			[VCS3]  = GEN11_GRDOM_MEDIA4,
416  			[VCS4]  = GEN11_GRDOM_MEDIA5,
417  			[VCS5]  = GEN11_GRDOM_MEDIA6,
418  			[VCS6]  = GEN11_GRDOM_MEDIA7,
419  			[VCS7]  = GEN11_GRDOM_MEDIA8,
420  			[VECS0] = GEN11_GRDOM_VECS,
421  			[VECS1] = GEN11_GRDOM_VECS2,
422  			[VECS2] = GEN11_GRDOM_VECS3,
423  			[VECS3] = GEN11_GRDOM_VECS4,
424  			[CCS0]  = GEN11_GRDOM_RENDER,
425  			[CCS1]  = GEN11_GRDOM_RENDER,
426  			[CCS2]  = GEN11_GRDOM_RENDER,
427  			[CCS3]  = GEN11_GRDOM_RENDER,
428  			[GSC0]  = GEN12_GRDOM_GSC,
429  		};
430  		GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) ||
431  			   !engine_reset_domains[id]);
432  		reset_domain = engine_reset_domains[id];
433  	} else {
434  		static const u32 engine_reset_domains[] = {
435  			[RCS0]  = GEN6_GRDOM_RENDER,
436  			[BCS0]  = GEN6_GRDOM_BLT,
437  			[VCS0]  = GEN6_GRDOM_MEDIA,
438  			[VCS1]  = GEN8_GRDOM_MEDIA2,
439  			[VECS0] = GEN6_GRDOM_VECS,
440  		};
441  		GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) ||
442  			   !engine_reset_domains[id]);
443  		reset_domain = engine_reset_domains[id];
444  	}
445  
446  	return reset_domain;
447  }
448  
intel_engine_setup(struct intel_gt * gt,enum intel_engine_id id,u8 logical_instance)449  static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
450  			      u8 logical_instance)
451  {
452  	const struct engine_info *info = &intel_engines[id];
453  	struct drm_i915_private *i915 = gt->i915;
454  	struct intel_engine_cs *engine;
455  	u8 guc_class;
456  
457  	BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
458  	BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
459  	BUILD_BUG_ON(I915_MAX_VCS > (MAX_ENGINE_INSTANCE + 1));
460  	BUILD_BUG_ON(I915_MAX_VECS > (MAX_ENGINE_INSTANCE + 1));
461  
462  	if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine)))
463  		return -EINVAL;
464  
465  	if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS))
466  		return -EINVAL;
467  
468  	if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
469  		return -EINVAL;
470  
471  	if (GEM_DEBUG_WARN_ON(gt->engine_class[info->class][info->instance]))
472  		return -EINVAL;
473  
474  	engine = kzalloc(sizeof(*engine), GFP_KERNEL);
475  	if (!engine)
476  		return -ENOMEM;
477  
478  	BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
479  
480  	INIT_LIST_HEAD(&engine->pinned_contexts_list);
481  	engine->id = id;
482  	engine->legacy_idx = INVALID_ENGINE;
483  	engine->mask = BIT(id);
484  	engine->reset_domain = get_reset_domain(GRAPHICS_VER(gt->i915),
485  						id);
486  	engine->i915 = i915;
487  	engine->gt = gt;
488  	engine->uncore = gt->uncore;
489  	guc_class = engine_class_to_guc_class(info->class);
490  	engine->guc_id = MAKE_GUC_ID(guc_class, info->instance);
491  	engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases);
492  
493  	engine->irq_handler = nop_irq_handler;
494  
495  	engine->class = info->class;
496  	engine->instance = info->instance;
497  	engine->logical_mask = BIT(logical_instance);
498  	__sprint_engine_name(engine);
499  
500  	if ((engine->class == COMPUTE_CLASS || engine->class == RENDER_CLASS) &&
501  	    __ffs(CCS_MASK(engine->gt) | RCS_MASK(engine->gt)) == engine->instance)
502  		engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE;
503  
504  	/* features common between engines sharing EUs */
505  	if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) {
506  		engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE;
507  		engine->flags |= I915_ENGINE_HAS_EU_PRIORITY;
508  	}
509  
510  	engine->props.heartbeat_interval_ms =
511  		CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
512  	engine->props.max_busywait_duration_ns =
513  		CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT;
514  	engine->props.preempt_timeout_ms =
515  		CONFIG_DRM_I915_PREEMPT_TIMEOUT;
516  	engine->props.stop_timeout_ms =
517  		CONFIG_DRM_I915_STOP_TIMEOUT;
518  	engine->props.timeslice_duration_ms =
519  		CONFIG_DRM_I915_TIMESLICE_DURATION;
520  
521  	/*
522  	 * Mid-thread pre-emption is not available in Gen12. Unfortunately,
523  	 * some compute workloads run quite long threads. That means they get
524  	 * reset due to not pre-empting in a timely manner. So, bump the
525  	 * pre-emption timeout value to be much higher for compute engines.
526  	 */
527  	if (GRAPHICS_VER(i915) == 12 && (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
528  		engine->props.preempt_timeout_ms = CONFIG_DRM_I915_PREEMPT_TIMEOUT_COMPUTE;
529  
530  	/* Cap properties according to any system limits */
531  #define CLAMP_PROP(field) \
532  	do { \
533  		u64 clamp = intel_clamp_##field(engine, engine->props.field); \
534  		if (clamp != engine->props.field) { \
535  			drm_notice(&engine->i915->drm, \
536  				   "Warning, clamping %s to %lld to prevent overflow\n", \
537  				   #field, clamp); \
538  			engine->props.field = clamp; \
539  		} \
540  	} while (0)
541  
542  	CLAMP_PROP(heartbeat_interval_ms);
543  	CLAMP_PROP(max_busywait_duration_ns);
544  	CLAMP_PROP(preempt_timeout_ms);
545  	CLAMP_PROP(stop_timeout_ms);
546  	CLAMP_PROP(timeslice_duration_ms);
547  
548  #undef CLAMP_PROP
549  
550  	engine->defaults = engine->props; /* never to change again */
551  
552  	engine->context_size = intel_engine_context_size(gt, engine->class);
553  	if (WARN_ON(engine->context_size > BIT(20)))
554  		engine->context_size = 0;
555  	if (engine->context_size)
556  		DRIVER_CAPS(i915)->has_logical_contexts = true;
557  
558  	ewma__engine_latency_init(&engine->latency);
559  
560  	ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
561  
562  	/* Scrub mmio state on takeover */
563  	intel_engine_sanitize_mmio(engine);
564  
565  	gt->engine_class[info->class][info->instance] = engine;
566  	gt->engine[id] = engine;
567  
568  	return 0;
569  }
570  
intel_clamp_heartbeat_interval_ms(struct intel_engine_cs * engine,u64 value)571  u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value)
572  {
573  	value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
574  
575  	return value;
576  }
577  
intel_clamp_max_busywait_duration_ns(struct intel_engine_cs * engine,u64 value)578  u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value)
579  {
580  	value = min(value, jiffies_to_nsecs(2));
581  
582  	return value;
583  }
584  
intel_clamp_preempt_timeout_ms(struct intel_engine_cs * engine,u64 value)585  u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value)
586  {
587  	/*
588  	 * NB: The GuC API only supports 32bit values. However, the limit is further
589  	 * reduced due to internal calculations which would otherwise overflow.
590  	 */
591  	if (intel_guc_submission_is_wanted(gt_to_guc(engine->gt)))
592  		value = min_t(u64, value, guc_policy_max_preempt_timeout_ms());
593  
594  	value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
595  
596  	return value;
597  }
598  
intel_clamp_stop_timeout_ms(struct intel_engine_cs * engine,u64 value)599  u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value)
600  {
601  	value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
602  
603  	return value;
604  }
605  
intel_clamp_timeslice_duration_ms(struct intel_engine_cs * engine,u64 value)606  u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value)
607  {
608  	/*
609  	 * NB: The GuC API only supports 32bit values. However, the limit is further
610  	 * reduced due to internal calculations which would otherwise overflow.
611  	 */
612  	if (intel_guc_submission_is_wanted(gt_to_guc(engine->gt)))
613  		value = min_t(u64, value, guc_policy_max_exec_quantum_ms());
614  
615  	value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
616  
617  	return value;
618  }
619  
__setup_engine_capabilities(struct intel_engine_cs * engine)620  static void __setup_engine_capabilities(struct intel_engine_cs *engine)
621  {
622  	struct drm_i915_private *i915 = engine->i915;
623  
624  	if (engine->class == VIDEO_DECODE_CLASS) {
625  		/*
626  		 * HEVC support is present on first engine instance
627  		 * before Gen11 and on all instances afterwards.
628  		 */
629  		if (GRAPHICS_VER(i915) >= 11 ||
630  		    (GRAPHICS_VER(i915) >= 9 && engine->instance == 0))
631  			engine->uabi_capabilities |=
632  				I915_VIDEO_CLASS_CAPABILITY_HEVC;
633  
634  		/*
635  		 * SFC block is present only on even logical engine
636  		 * instances.
637  		 */
638  		if ((GRAPHICS_VER(i915) >= 11 &&
639  		     (engine->gt->info.vdbox_sfc_access &
640  		      BIT(engine->instance))) ||
641  		    (GRAPHICS_VER(i915) >= 9 && engine->instance == 0))
642  			engine->uabi_capabilities |=
643  				I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
644  	} else if (engine->class == VIDEO_ENHANCEMENT_CLASS) {
645  		if (GRAPHICS_VER(i915) >= 9 &&
646  		    engine->gt->info.sfc_mask & BIT(engine->instance))
647  			engine->uabi_capabilities |=
648  				I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
649  	}
650  }
651  
intel_setup_engine_capabilities(struct intel_gt * gt)652  static void intel_setup_engine_capabilities(struct intel_gt *gt)
653  {
654  	struct intel_engine_cs *engine;
655  	enum intel_engine_id id;
656  
657  	for_each_engine(engine, gt, id)
658  		__setup_engine_capabilities(engine);
659  }
660  
661  /**
662   * intel_engines_release() - free the resources allocated for Command Streamers
663   * @gt: pointer to struct intel_gt
664   */
intel_engines_release(struct intel_gt * gt)665  void intel_engines_release(struct intel_gt *gt)
666  {
667  	struct intel_engine_cs *engine;
668  	enum intel_engine_id id;
669  
670  	/*
671  	 * Before we release the resources held by engine, we must be certain
672  	 * that the HW is no longer accessing them -- having the GPU scribble
673  	 * to or read from a page being used for something else causes no end
674  	 * of fun.
675  	 *
676  	 * The GPU should be reset by this point, but assume the worst just
677  	 * in case we aborted before completely initialising the engines.
678  	 */
679  	GEM_BUG_ON(intel_gt_pm_is_awake(gt));
680  	if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
681  		intel_gt_reset_all_engines(gt);
682  
683  	/* Decouple the backend; but keep the layout for late GPU resets */
684  	for_each_engine(engine, gt, id) {
685  		if (!engine->release)
686  			continue;
687  
688  		intel_wakeref_wait_for_idle(&engine->wakeref);
689  		GEM_BUG_ON(intel_engine_pm_is_awake(engine));
690  
691  		engine->release(engine);
692  		engine->release = NULL;
693  
694  		memset(&engine->reset, 0, sizeof(engine->reset));
695  	}
696  
697  	llist_del_all(&gt->i915->uabi_engines_llist);
698  }
699  
intel_engine_free_request_pool(struct intel_engine_cs * engine)700  void intel_engine_free_request_pool(struct intel_engine_cs *engine)
701  {
702  	if (!engine->request_pool)
703  		return;
704  
705  	kmem_cache_free(i915_request_slab_cache(), engine->request_pool);
706  }
707  
intel_engines_free(struct intel_gt * gt)708  void intel_engines_free(struct intel_gt *gt)
709  {
710  	struct intel_engine_cs *engine;
711  	enum intel_engine_id id;
712  
713  	/* Free the requests! dma-resv keeps fences around for an eternity */
714  	rcu_barrier();
715  
716  	for_each_engine(engine, gt, id) {
717  		intel_engine_free_request_pool(engine);
718  		kfree(engine);
719  		gt->engine[id] = NULL;
720  	}
721  }
722  
723  static
gen11_vdbox_has_sfc(struct intel_gt * gt,unsigned int physical_vdbox,unsigned int logical_vdbox,u16 vdbox_mask)724  bool gen11_vdbox_has_sfc(struct intel_gt *gt,
725  			 unsigned int physical_vdbox,
726  			 unsigned int logical_vdbox, u16 vdbox_mask)
727  {
728  	struct drm_i915_private *i915 = gt->i915;
729  
730  	/*
731  	 * In Gen11, only even numbered logical VDBOXes are hooked
732  	 * up to an SFC (Scaler & Format Converter) unit.
733  	 * In Gen12, Even numbered physical instance always are connected
734  	 * to an SFC. Odd numbered physical instances have SFC only if
735  	 * previous even instance is fused off.
736  	 *
737  	 * Starting with Xe_HP, there's also a dedicated SFC_ENABLE field
738  	 * in the fuse register that tells us whether a specific SFC is present.
739  	 */
740  	if ((gt->info.sfc_mask & BIT(physical_vdbox / 2)) == 0)
741  		return false;
742  	else if (MEDIA_VER(i915) >= 12)
743  		return (physical_vdbox % 2 == 0) ||
744  			!(BIT(physical_vdbox - 1) & vdbox_mask);
745  	else if (MEDIA_VER(i915) == 11)
746  		return logical_vdbox % 2 == 0;
747  
748  	return false;
749  }
750  
engine_mask_apply_media_fuses(struct intel_gt * gt)751  static void engine_mask_apply_media_fuses(struct intel_gt *gt)
752  {
753  	struct drm_i915_private *i915 = gt->i915;
754  	unsigned int logical_vdbox = 0;
755  	unsigned int i;
756  	u32 media_fuse, fuse1;
757  	u16 vdbox_mask;
758  	u16 vebox_mask;
759  
760  	if (MEDIA_VER(gt->i915) < 11)
761  		return;
762  
763  	/*
764  	 * On newer platforms the fusing register is called 'enable' and has
765  	 * enable semantics, while on older platforms it is called 'disable'
766  	 * and bits have disable semantices.
767  	 */
768  	media_fuse = intel_uncore_read(gt->uncore, GEN11_GT_VEBOX_VDBOX_DISABLE);
769  	if (MEDIA_VER_FULL(i915) < IP_VER(12, 55))
770  		media_fuse = ~media_fuse;
771  
772  	vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK;
773  	vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
774  		      GEN11_GT_VEBOX_DISABLE_SHIFT;
775  
776  	if (MEDIA_VER_FULL(i915) >= IP_VER(12, 55)) {
777  		fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1);
778  		gt->info.sfc_mask = REG_FIELD_GET(XEHP_SFC_ENABLE_MASK, fuse1);
779  	} else {
780  		gt->info.sfc_mask = ~0;
781  	}
782  
783  	for (i = 0; i < I915_MAX_VCS; i++) {
784  		if (!HAS_ENGINE(gt, _VCS(i))) {
785  			vdbox_mask &= ~BIT(i);
786  			continue;
787  		}
788  
789  		if (!(BIT(i) & vdbox_mask)) {
790  			gt->info.engine_mask &= ~BIT(_VCS(i));
791  			gt_dbg(gt, "vcs%u fused off\n", i);
792  			continue;
793  		}
794  
795  		if (gen11_vdbox_has_sfc(gt, i, logical_vdbox, vdbox_mask))
796  			gt->info.vdbox_sfc_access |= BIT(i);
797  		logical_vdbox++;
798  	}
799  	gt_dbg(gt, "vdbox enable: %04x, instances: %04lx\n", vdbox_mask, VDBOX_MASK(gt));
800  	GEM_BUG_ON(vdbox_mask != VDBOX_MASK(gt));
801  
802  	for (i = 0; i < I915_MAX_VECS; i++) {
803  		if (!HAS_ENGINE(gt, _VECS(i))) {
804  			vebox_mask &= ~BIT(i);
805  			continue;
806  		}
807  
808  		if (!(BIT(i) & vebox_mask)) {
809  			gt->info.engine_mask &= ~BIT(_VECS(i));
810  			gt_dbg(gt, "vecs%u fused off\n", i);
811  		}
812  	}
813  	gt_dbg(gt, "vebox enable: %04x, instances: %04lx\n", vebox_mask, VEBOX_MASK(gt));
814  	GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt));
815  }
816  
engine_mask_apply_compute_fuses(struct intel_gt * gt)817  static void engine_mask_apply_compute_fuses(struct intel_gt *gt)
818  {
819  	struct drm_i915_private *i915 = gt->i915;
820  	struct intel_gt_info *info = &gt->info;
821  	int ss_per_ccs = info->sseu.max_subslices / I915_MAX_CCS;
822  	unsigned long ccs_mask;
823  	unsigned int i;
824  
825  	if (GRAPHICS_VER(i915) < 11)
826  		return;
827  
828  	if (hweight32(CCS_MASK(gt)) <= 1)
829  		return;
830  
831  	ccs_mask = intel_slicemask_from_xehp_dssmask(info->sseu.compute_subslice_mask,
832  						     ss_per_ccs);
833  	/*
834  	 * If all DSS in a quadrant are fused off, the corresponding CCS
835  	 * engine is not available for use.
836  	 */
837  	for_each_clear_bit(i, &ccs_mask, I915_MAX_CCS) {
838  		info->engine_mask &= ~BIT(_CCS(i));
839  		gt_dbg(gt, "ccs%u fused off\n", i);
840  	}
841  }
842  
843  /*
844   * Determine which engines are fused off in our particular hardware.
845   * Note that we have a catch-22 situation where we need to be able to access
846   * the blitter forcewake domain to read the engine fuses, but at the same time
847   * we need to know which engines are available on the system to know which
848   * forcewake domains are present. We solve this by intializing the forcewake
849   * domains based on the full engine mask in the platform capabilities before
850   * calling this function and pruning the domains for fused-off engines
851   * afterwards.
852   */
init_engine_mask(struct intel_gt * gt)853  static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
854  {
855  	struct intel_gt_info *info = &gt->info;
856  
857  	GEM_BUG_ON(!info->engine_mask);
858  
859  	engine_mask_apply_media_fuses(gt);
860  	engine_mask_apply_compute_fuses(gt);
861  
862  	/*
863  	 * The only use of the GSC CS is to load and communicate with the GSC
864  	 * FW, so we have no use for it if we don't have the FW.
865  	 *
866  	 * IMPORTANT: in cases where we don't have the GSC FW, we have a
867  	 * catch-22 situation that breaks media C6 due to 2 requirements:
868  	 * 1) once turned on, the GSC power well will not go to sleep unless the
869  	 *    GSC FW is loaded.
870  	 * 2) to enable idling (which is required for media C6) we need to
871  	 *    initialize the IDLE_MSG register for the GSC CS and do at least 1
872  	 *    submission, which will wake up the GSC power well.
873  	 */
874  	if (__HAS_ENGINE(info->engine_mask, GSC0) && !intel_uc_wants_gsc_uc(&gt->uc)) {
875  		gt_notice(gt, "No GSC FW selected, disabling GSC CS and media C6\n");
876  		info->engine_mask &= ~BIT(GSC0);
877  	}
878  
879  	/*
880  	 * Do not create the command streamer for CCS slices beyond the first.
881  	 * All the workload submitted to the first engine will be shared among
882  	 * all the slices.
883  	 *
884  	 * Once the user will be allowed to customize the CCS mode, then this
885  	 * check needs to be removed.
886  	 */
887  	if (IS_DG2(gt->i915)) {
888  		u8 first_ccs = __ffs(CCS_MASK(gt));
889  
890  		/*
891  		 * Store the number of active cslices before
892  		 * changing the CCS engine configuration
893  		 */
894  		gt->ccs.cslices = CCS_MASK(gt);
895  
896  		/* Mask off all the CCS engine */
897  		info->engine_mask &= ~GENMASK(CCS3, CCS0);
898  		/* Put back in the first CCS engine */
899  		info->engine_mask |= BIT(_CCS(first_ccs));
900  	}
901  
902  	return info->engine_mask;
903  }
904  
populate_logical_ids(struct intel_gt * gt,u8 * logical_ids,u8 class,const u8 * map,u8 num_instances)905  static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids,
906  				 u8 class, const u8 *map, u8 num_instances)
907  {
908  	int i, j;
909  	u8 current_logical_id = 0;
910  
911  	for (j = 0; j < num_instances; ++j) {
912  		for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
913  			if (!HAS_ENGINE(gt, i) ||
914  			    intel_engines[i].class != class)
915  				continue;
916  
917  			if (intel_engines[i].instance == map[j]) {
918  				logical_ids[intel_engines[i].instance] =
919  					current_logical_id++;
920  				break;
921  			}
922  		}
923  	}
924  }
925  
setup_logical_ids(struct intel_gt * gt,u8 * logical_ids,u8 class)926  static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class)
927  {
928  	/*
929  	 * Logical to physical mapping is needed for proper support
930  	 * to split-frame feature.
931  	 */
932  	if (MEDIA_VER(gt->i915) >= 11 && class == VIDEO_DECODE_CLASS) {
933  		const u8 map[] = { 0, 2, 4, 6, 1, 3, 5, 7 };
934  
935  		populate_logical_ids(gt, logical_ids, class,
936  				     map, ARRAY_SIZE(map));
937  	} else {
938  		int i;
939  		u8 map[MAX_ENGINE_INSTANCE + 1];
940  
941  		for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
942  			map[i] = i;
943  		populate_logical_ids(gt, logical_ids, class,
944  				     map, ARRAY_SIZE(map));
945  	}
946  }
947  
948  /**
949   * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
950   * @gt: pointer to struct intel_gt
951   *
952   * Return: non-zero if the initialization failed.
953   */
intel_engines_init_mmio(struct intel_gt * gt)954  int intel_engines_init_mmio(struct intel_gt *gt)
955  {
956  	struct drm_i915_private *i915 = gt->i915;
957  	const unsigned int engine_mask = init_engine_mask(gt);
958  	unsigned int mask = 0;
959  	unsigned int i, class;
960  	u8 logical_ids[MAX_ENGINE_INSTANCE + 1];
961  	int err;
962  
963  	drm_WARN_ON(&i915->drm, engine_mask == 0);
964  	drm_WARN_ON(&i915->drm, engine_mask &
965  		    GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES));
966  
967  	if (i915_inject_probe_failure(i915))
968  		return -ENODEV;
969  
970  	for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
971  		setup_logical_ids(gt, logical_ids, class);
972  
973  		for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
974  			u8 instance = intel_engines[i].instance;
975  
976  			if (intel_engines[i].class != class ||
977  			    !HAS_ENGINE(gt, i))
978  				continue;
979  
980  			err = intel_engine_setup(gt, i,
981  						 logical_ids[instance]);
982  			if (err)
983  				goto cleanup;
984  
985  			mask |= BIT(i);
986  		}
987  	}
988  
989  	/*
990  	 * Catch failures to update intel_engines table when the new engines
991  	 * are added to the driver by a warning and disabling the forgotten
992  	 * engines.
993  	 */
994  	if (drm_WARN_ON(&i915->drm, mask != engine_mask))
995  		gt->info.engine_mask = mask;
996  
997  	gt->info.num_engines = hweight32(mask);
998  
999  	intel_gt_check_and_clear_faults(gt);
1000  
1001  	intel_setup_engine_capabilities(gt);
1002  
1003  	intel_uncore_prune_engine_fw_domains(gt->uncore, gt);
1004  
1005  	return 0;
1006  
1007  cleanup:
1008  	intel_engines_free(gt);
1009  	return err;
1010  }
1011  
intel_engine_init_execlists(struct intel_engine_cs * engine)1012  void intel_engine_init_execlists(struct intel_engine_cs *engine)
1013  {
1014  	struct intel_engine_execlists * const execlists = &engine->execlists;
1015  
1016  	execlists->port_mask = 1;
1017  	GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
1018  	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
1019  
1020  	memset(execlists->pending, 0, sizeof(execlists->pending));
1021  	execlists->active =
1022  		memset(execlists->inflight, 0, sizeof(execlists->inflight));
1023  }
1024  
cleanup_status_page(struct intel_engine_cs * engine)1025  static void cleanup_status_page(struct intel_engine_cs *engine)
1026  {
1027  	struct i915_vma *vma;
1028  
1029  	/* Prevent writes into HWSP after returning the page to the system */
1030  	intel_engine_set_hwsp_writemask(engine, ~0u);
1031  
1032  	vma = fetch_and_zero(&engine->status_page.vma);
1033  	if (!vma)
1034  		return;
1035  
1036  	if (!HWS_NEEDS_PHYSICAL(engine->i915))
1037  		i915_vma_unpin(vma);
1038  
1039  	i915_gem_object_unpin_map(vma->obj);
1040  	i915_gem_object_put(vma->obj);
1041  }
1042  
pin_ggtt_status_page(struct intel_engine_cs * engine,struct i915_gem_ww_ctx * ww,struct i915_vma * vma)1043  static int pin_ggtt_status_page(struct intel_engine_cs *engine,
1044  				struct i915_gem_ww_ctx *ww,
1045  				struct i915_vma *vma)
1046  {
1047  	unsigned int flags;
1048  
1049  	if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt))
1050  		/*
1051  		 * On g33, we cannot place HWS above 256MiB, so
1052  		 * restrict its pinning to the low mappable arena.
1053  		 * Though this restriction is not documented for
1054  		 * gen4, gen5, or byt, they also behave similarly
1055  		 * and hang if the HWS is placed at the top of the
1056  		 * GTT. To generalise, it appears that all !llc
1057  		 * platforms have issues with us placing the HWS
1058  		 * above the mappable region (even though we never
1059  		 * actually map it).
1060  		 */
1061  		flags = PIN_MAPPABLE;
1062  	else
1063  		flags = PIN_HIGH;
1064  
1065  	return i915_ggtt_pin(vma, ww, 0, flags);
1066  }
1067  
init_status_page(struct intel_engine_cs * engine)1068  static int init_status_page(struct intel_engine_cs *engine)
1069  {
1070  	struct drm_i915_gem_object *obj;
1071  	struct i915_gem_ww_ctx ww;
1072  	struct i915_vma *vma;
1073  	void *vaddr;
1074  	int ret;
1075  
1076  	INIT_LIST_HEAD(&engine->status_page.timelines);
1077  
1078  	/*
1079  	 * Though the HWS register does support 36bit addresses, historically
1080  	 * we have had hangs and corruption reported due to wild writes if
1081  	 * the HWS is placed above 4G. We only allow objects to be allocated
1082  	 * in GFP_DMA32 for i965, and no earlier physical address users had
1083  	 * access to more than 4G.
1084  	 */
1085  	obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
1086  	if (IS_ERR(obj)) {
1087  		gt_err(engine->gt, "Failed to allocate status page\n");
1088  		return PTR_ERR(obj);
1089  	}
1090  
1091  	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
1092  
1093  	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
1094  	if (IS_ERR(vma)) {
1095  		ret = PTR_ERR(vma);
1096  		goto err_put;
1097  	}
1098  
1099  	i915_gem_ww_ctx_init(&ww, true);
1100  retry:
1101  	ret = i915_gem_object_lock(obj, &ww);
1102  	if (!ret && !HWS_NEEDS_PHYSICAL(engine->i915))
1103  		ret = pin_ggtt_status_page(engine, &ww, vma);
1104  	if (ret)
1105  		goto err;
1106  
1107  	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
1108  	if (IS_ERR(vaddr)) {
1109  		ret = PTR_ERR(vaddr);
1110  		goto err_unpin;
1111  	}
1112  
1113  	engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
1114  	engine->status_page.vma = vma;
1115  
1116  err_unpin:
1117  	if (ret)
1118  		i915_vma_unpin(vma);
1119  err:
1120  	if (ret == -EDEADLK) {
1121  		ret = i915_gem_ww_ctx_backoff(&ww);
1122  		if (!ret)
1123  			goto retry;
1124  	}
1125  	i915_gem_ww_ctx_fini(&ww);
1126  err_put:
1127  	if (ret)
1128  		i915_gem_object_put(obj);
1129  	return ret;
1130  }
1131  
intel_engine_init_tlb_invalidation(struct intel_engine_cs * engine)1132  static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine)
1133  {
1134  	static const union intel_engine_tlb_inv_reg gen8_regs[] = {
1135  		[RENDER_CLASS].reg		= GEN8_RTCR,
1136  		[VIDEO_DECODE_CLASS].reg	= GEN8_M1TCR, /* , GEN8_M2TCR */
1137  		[VIDEO_ENHANCEMENT_CLASS].reg	= GEN8_VTCR,
1138  		[COPY_ENGINE_CLASS].reg		= GEN8_BTCR,
1139  	};
1140  	static const union intel_engine_tlb_inv_reg gen12_regs[] = {
1141  		[RENDER_CLASS].reg		= GEN12_GFX_TLB_INV_CR,
1142  		[VIDEO_DECODE_CLASS].reg	= GEN12_VD_TLB_INV_CR,
1143  		[VIDEO_ENHANCEMENT_CLASS].reg	= GEN12_VE_TLB_INV_CR,
1144  		[COPY_ENGINE_CLASS].reg		= GEN12_BLT_TLB_INV_CR,
1145  		[COMPUTE_CLASS].reg		= GEN12_COMPCTX_TLB_INV_CR,
1146  	};
1147  	static const union intel_engine_tlb_inv_reg xehp_regs[] = {
1148  		[RENDER_CLASS].mcr_reg		  = XEHP_GFX_TLB_INV_CR,
1149  		[VIDEO_DECODE_CLASS].mcr_reg	  = XEHP_VD_TLB_INV_CR,
1150  		[VIDEO_ENHANCEMENT_CLASS].mcr_reg = XEHP_VE_TLB_INV_CR,
1151  		[COPY_ENGINE_CLASS].mcr_reg	  = XEHP_BLT_TLB_INV_CR,
1152  		[COMPUTE_CLASS].mcr_reg		  = XEHP_COMPCTX_TLB_INV_CR,
1153  	};
1154  	static const union intel_engine_tlb_inv_reg xelpmp_regs[] = {
1155  		[VIDEO_DECODE_CLASS].reg	  = GEN12_VD_TLB_INV_CR,
1156  		[VIDEO_ENHANCEMENT_CLASS].reg     = GEN12_VE_TLB_INV_CR,
1157  		[OTHER_CLASS].reg		  = XELPMP_GSC_TLB_INV_CR,
1158  	};
1159  	struct drm_i915_private *i915 = engine->i915;
1160  	const unsigned int instance = engine->instance;
1161  	const unsigned int class = engine->class;
1162  	const union intel_engine_tlb_inv_reg *regs;
1163  	union intel_engine_tlb_inv_reg reg;
1164  	unsigned int num = 0;
1165  	u32 val;
1166  
1167  	/*
1168  	 * New platforms should not be added with catch-all-newer (>=)
1169  	 * condition so that any later platform added triggers the below warning
1170  	 * and in turn mandates a human cross-check of whether the invalidation
1171  	 * flows have compatible semantics.
1172  	 *
1173  	 * For instance with the 11.00 -> 12.00 transition three out of five
1174  	 * respective engine registers were moved to masked type. Then after the
1175  	 * 12.00 -> 12.50 transition multi cast handling is required too.
1176  	 */
1177  
1178  	if (engine->gt->type == GT_MEDIA) {
1179  		if (MEDIA_VER_FULL(i915) == IP_VER(13, 0)) {
1180  			regs = xelpmp_regs;
1181  			num = ARRAY_SIZE(xelpmp_regs);
1182  		}
1183  	} else {
1184  		if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 74) ||
1185  		    GRAPHICS_VER_FULL(i915) == IP_VER(12, 71) ||
1186  		    GRAPHICS_VER_FULL(i915) == IP_VER(12, 70) ||
1187  		    GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
1188  			regs = xehp_regs;
1189  			num = ARRAY_SIZE(xehp_regs);
1190  		} else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
1191  			   GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
1192  			regs = gen12_regs;
1193  			num = ARRAY_SIZE(gen12_regs);
1194  		} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
1195  			regs = gen8_regs;
1196  			num = ARRAY_SIZE(gen8_regs);
1197  		} else if (GRAPHICS_VER(i915) < 8) {
1198  			return 0;
1199  		}
1200  	}
1201  
1202  	if (gt_WARN_ONCE(engine->gt, !num,
1203  			 "Platform does not implement TLB invalidation!"))
1204  		return -ENODEV;
1205  
1206  	if (gt_WARN_ON_ONCE(engine->gt,
1207  			    class >= num ||
1208  			    (!regs[class].reg.reg &&
1209  			     !regs[class].mcr_reg.reg)))
1210  		return -ERANGE;
1211  
1212  	reg = regs[class];
1213  
1214  	if (regs == xelpmp_regs && class == OTHER_CLASS) {
1215  		/*
1216  		 * There's only a single GSC instance, but it uses register bit
1217  		 * 1 instead of either 0 or OTHER_GSC_INSTANCE.
1218  		 */
1219  		GEM_WARN_ON(instance != OTHER_GSC_INSTANCE);
1220  		val = 1;
1221  	} else if (regs == gen8_regs && class == VIDEO_DECODE_CLASS && instance == 1) {
1222  		reg.reg = GEN8_M2TCR;
1223  		val = 0;
1224  	} else {
1225  		val = instance;
1226  	}
1227  
1228  	val = BIT(val);
1229  
1230  	engine->tlb_inv.mcr = regs == xehp_regs;
1231  	engine->tlb_inv.reg = reg;
1232  	engine->tlb_inv.done = val;
1233  
1234  	if (GRAPHICS_VER(i915) >= 12 &&
1235  	    (engine->class == VIDEO_DECODE_CLASS ||
1236  	     engine->class == VIDEO_ENHANCEMENT_CLASS ||
1237  	     engine->class == COMPUTE_CLASS ||
1238  	     engine->class == OTHER_CLASS))
1239  		engine->tlb_inv.request = _MASKED_BIT_ENABLE(val);
1240  	else
1241  		engine->tlb_inv.request = val;
1242  
1243  	return 0;
1244  }
1245  
engine_setup_common(struct intel_engine_cs * engine)1246  static int engine_setup_common(struct intel_engine_cs *engine)
1247  {
1248  	int err;
1249  
1250  	init_llist_head(&engine->barrier_tasks);
1251  
1252  	err = intel_engine_init_tlb_invalidation(engine);
1253  	if (err)
1254  		return err;
1255  
1256  	err = init_status_page(engine);
1257  	if (err)
1258  		return err;
1259  
1260  	engine->breadcrumbs = intel_breadcrumbs_create(engine);
1261  	if (!engine->breadcrumbs) {
1262  		err = -ENOMEM;
1263  		goto err_status;
1264  	}
1265  
1266  	engine->sched_engine = i915_sched_engine_create(ENGINE_PHYSICAL);
1267  	if (!engine->sched_engine) {
1268  		err = -ENOMEM;
1269  		goto err_sched_engine;
1270  	}
1271  	engine->sched_engine->private_data = engine;
1272  
1273  	err = intel_engine_init_cmd_parser(engine);
1274  	if (err)
1275  		goto err_cmd_parser;
1276  
1277  	intel_engine_init_execlists(engine);
1278  	intel_engine_init__pm(engine);
1279  	intel_engine_init_retire(engine);
1280  
1281  	/* Use the whole device by default */
1282  	engine->sseu =
1283  		intel_sseu_from_device_info(&engine->gt->info.sseu);
1284  
1285  	intel_engine_init_workarounds(engine);
1286  	intel_engine_init_whitelist(engine);
1287  	intel_engine_init_ctx_wa(engine);
1288  
1289  	if (GRAPHICS_VER(engine->i915) >= 12)
1290  		engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
1291  
1292  	return 0;
1293  
1294  err_cmd_parser:
1295  	i915_sched_engine_put(engine->sched_engine);
1296  err_sched_engine:
1297  	intel_breadcrumbs_put(engine->breadcrumbs);
1298  err_status:
1299  	cleanup_status_page(engine);
1300  	return err;
1301  }
1302  
1303  struct measure_breadcrumb {
1304  	struct i915_request rq;
1305  	struct intel_ring ring;
1306  	u32 cs[2048];
1307  };
1308  
measure_breadcrumb_dw(struct intel_context * ce)1309  static int measure_breadcrumb_dw(struct intel_context *ce)
1310  {
1311  	struct intel_engine_cs *engine = ce->engine;
1312  	struct measure_breadcrumb *frame;
1313  	int dw;
1314  
1315  	GEM_BUG_ON(!engine->gt->scratch);
1316  
1317  	frame = kzalloc(sizeof(*frame), GFP_KERNEL);
1318  	if (!frame)
1319  		return -ENOMEM;
1320  
1321  	frame->rq.i915 = engine->i915;
1322  	frame->rq.engine = engine;
1323  	frame->rq.context = ce;
1324  	rcu_assign_pointer(frame->rq.timeline, ce->timeline);
1325  	frame->rq.hwsp_seqno = ce->timeline->hwsp_seqno;
1326  
1327  	frame->ring.vaddr = frame->cs;
1328  	frame->ring.size = sizeof(frame->cs);
1329  	frame->ring.wrap =
1330  		BITS_PER_TYPE(frame->ring.size) - ilog2(frame->ring.size);
1331  	frame->ring.effective_size = frame->ring.size;
1332  	intel_ring_update_space(&frame->ring);
1333  	frame->rq.ring = &frame->ring;
1334  
1335  	mutex_lock(&ce->timeline->mutex);
1336  	spin_lock_irq(&engine->sched_engine->lock);
1337  
1338  	dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
1339  
1340  	spin_unlock_irq(&engine->sched_engine->lock);
1341  	mutex_unlock(&ce->timeline->mutex);
1342  
1343  	GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */
1344  
1345  	kfree(frame);
1346  	return dw;
1347  }
1348  
1349  struct intel_context *
intel_engine_create_pinned_context(struct intel_engine_cs * engine,struct i915_address_space * vm,unsigned int ring_size,unsigned int hwsp,struct lock_class_key * key,const char * name)1350  intel_engine_create_pinned_context(struct intel_engine_cs *engine,
1351  				   struct i915_address_space *vm,
1352  				   unsigned int ring_size,
1353  				   unsigned int hwsp,
1354  				   struct lock_class_key *key,
1355  				   const char *name)
1356  {
1357  	struct intel_context *ce;
1358  	int err;
1359  
1360  	ce = intel_context_create(engine);
1361  	if (IS_ERR(ce))
1362  		return ce;
1363  
1364  	__set_bit(CONTEXT_BARRIER_BIT, &ce->flags);
1365  	ce->timeline = page_pack_bits(NULL, hwsp);
1366  	ce->ring = NULL;
1367  	ce->ring_size = ring_size;
1368  
1369  	i915_vm_put(ce->vm);
1370  	ce->vm = i915_vm_get(vm);
1371  
1372  	err = intel_context_pin(ce); /* perma-pin so it is always available */
1373  	if (err) {
1374  		intel_context_put(ce);
1375  		return ERR_PTR(err);
1376  	}
1377  
1378  	list_add_tail(&ce->pinned_contexts_link, &engine->pinned_contexts_list);
1379  
1380  	/*
1381  	 * Give our perma-pinned kernel timelines a separate lockdep class,
1382  	 * so that we can use them from within the normal user timelines
1383  	 * should we need to inject GPU operations during their request
1384  	 * construction.
1385  	 */
1386  	lockdep_set_class_and_name(&ce->timeline->mutex, key, name);
1387  
1388  	return ce;
1389  }
1390  
intel_engine_destroy_pinned_context(struct intel_context * ce)1391  void intel_engine_destroy_pinned_context(struct intel_context *ce)
1392  {
1393  	struct intel_engine_cs *engine = ce->engine;
1394  	struct i915_vma *hwsp = engine->status_page.vma;
1395  
1396  	GEM_BUG_ON(ce->timeline->hwsp_ggtt != hwsp);
1397  
1398  	mutex_lock(&hwsp->vm->mutex);
1399  	list_del(&ce->timeline->engine_link);
1400  	mutex_unlock(&hwsp->vm->mutex);
1401  
1402  	list_del(&ce->pinned_contexts_link);
1403  	intel_context_unpin(ce);
1404  	intel_context_put(ce);
1405  }
1406  
1407  static struct intel_context *
create_ggtt_bind_context(struct intel_engine_cs * engine)1408  create_ggtt_bind_context(struct intel_engine_cs *engine)
1409  {
1410  	static struct lock_class_key kernel;
1411  
1412  	/*
1413  	 * MI_UPDATE_GTT can insert up to 511 PTE entries and there could be multiple
1414  	 * bind requets at a time so get a bigger ring.
1415  	 */
1416  	return intel_engine_create_pinned_context(engine, engine->gt->vm, SZ_512K,
1417  						  I915_GEM_HWS_GGTT_BIND_ADDR,
1418  						  &kernel, "ggtt_bind_context");
1419  }
1420  
1421  static struct intel_context *
create_kernel_context(struct intel_engine_cs * engine)1422  create_kernel_context(struct intel_engine_cs *engine)
1423  {
1424  	static struct lock_class_key kernel;
1425  
1426  	return intel_engine_create_pinned_context(engine, engine->gt->vm, SZ_4K,
1427  						  I915_GEM_HWS_SEQNO_ADDR,
1428  						  &kernel, "kernel_context");
1429  }
1430  
1431  /*
1432   * engine_init_common - initialize engine state which might require hw access
1433   * @engine: Engine to initialize.
1434   *
1435   * Initializes @engine@ structure members shared between legacy and execlists
1436   * submission modes which do require hardware access.
1437   *
1438   * Typcally done at later stages of submission mode specific engine setup.
1439   *
1440   * Returns zero on success or an error code on failure.
1441   */
engine_init_common(struct intel_engine_cs * engine)1442  static int engine_init_common(struct intel_engine_cs *engine)
1443  {
1444  	struct intel_context *ce, *bce = NULL;
1445  	int ret;
1446  
1447  	engine->set_default_submission(engine);
1448  
1449  	/*
1450  	 * We may need to do things with the shrinker which
1451  	 * require us to immediately switch back to the default
1452  	 * context. This can cause a problem as pinning the
1453  	 * default context also requires GTT space which may not
1454  	 * be available. To avoid this we always pin the default
1455  	 * context.
1456  	 */
1457  	ce = create_kernel_context(engine);
1458  	if (IS_ERR(ce))
1459  		return PTR_ERR(ce);
1460  	/*
1461  	 * Create a separate pinned context for GGTT update with blitter engine
1462  	 * if a platform require such service. MI_UPDATE_GTT works on other
1463  	 * engines as well but BCS should be less busy engine so pick that for
1464  	 * GGTT updates.
1465  	 */
1466  	if (i915_ggtt_require_binder(engine->i915) && engine->id == BCS0) {
1467  		bce = create_ggtt_bind_context(engine);
1468  		if (IS_ERR(bce)) {
1469  			ret = PTR_ERR(bce);
1470  			goto err_ce_context;
1471  		}
1472  	}
1473  
1474  	ret = measure_breadcrumb_dw(ce);
1475  	if (ret < 0)
1476  		goto err_bce_context;
1477  
1478  	engine->emit_fini_breadcrumb_dw = ret;
1479  	engine->kernel_context = ce;
1480  	engine->bind_context = bce;
1481  
1482  	return 0;
1483  
1484  err_bce_context:
1485  	if (bce)
1486  		intel_engine_destroy_pinned_context(bce);
1487  err_ce_context:
1488  	intel_engine_destroy_pinned_context(ce);
1489  	return ret;
1490  }
1491  
intel_engines_init(struct intel_gt * gt)1492  int intel_engines_init(struct intel_gt *gt)
1493  {
1494  	int (*setup)(struct intel_engine_cs *engine);
1495  	struct intel_engine_cs *engine;
1496  	enum intel_engine_id id;
1497  	int err;
1498  
1499  	if (intel_uc_uses_guc_submission(&gt->uc)) {
1500  		gt->submission_method = INTEL_SUBMISSION_GUC;
1501  		setup = intel_guc_submission_setup;
1502  	} else if (HAS_EXECLISTS(gt->i915)) {
1503  		gt->submission_method = INTEL_SUBMISSION_ELSP;
1504  		setup = intel_execlists_submission_setup;
1505  	} else {
1506  		gt->submission_method = INTEL_SUBMISSION_RING;
1507  		setup = intel_ring_submission_setup;
1508  	}
1509  
1510  	for_each_engine(engine, gt, id) {
1511  		err = engine_setup_common(engine);
1512  		if (err)
1513  			return err;
1514  
1515  		err = setup(engine);
1516  		if (err) {
1517  			intel_engine_cleanup_common(engine);
1518  			return err;
1519  		}
1520  
1521  		/* The backend should now be responsible for cleanup */
1522  		GEM_BUG_ON(engine->release == NULL);
1523  
1524  		err = engine_init_common(engine);
1525  		if (err)
1526  			return err;
1527  
1528  		intel_engine_add_user(engine);
1529  	}
1530  
1531  	return 0;
1532  }
1533  
1534  /**
1535   * intel_engine_cleanup_common - cleans up the engine state created by
1536   *                                the common initiailizers.
1537   * @engine: Engine to cleanup.
1538   *
1539   * This cleans up everything created by the common helpers.
1540   */
intel_engine_cleanup_common(struct intel_engine_cs * engine)1541  void intel_engine_cleanup_common(struct intel_engine_cs *engine)
1542  {
1543  	GEM_BUG_ON(!list_empty(&engine->sched_engine->requests));
1544  
1545  	i915_sched_engine_put(engine->sched_engine);
1546  	intel_breadcrumbs_put(engine->breadcrumbs);
1547  
1548  	intel_engine_fini_retire(engine);
1549  	intel_engine_cleanup_cmd_parser(engine);
1550  
1551  	if (engine->default_state)
1552  		fput(engine->default_state);
1553  
1554  	if (engine->kernel_context)
1555  		intel_engine_destroy_pinned_context(engine->kernel_context);
1556  
1557  	if (engine->bind_context)
1558  		intel_engine_destroy_pinned_context(engine->bind_context);
1559  
1560  
1561  	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
1562  	cleanup_status_page(engine);
1563  
1564  	intel_wa_list_free(&engine->ctx_wa_list);
1565  	intel_wa_list_free(&engine->wa_list);
1566  	intel_wa_list_free(&engine->whitelist);
1567  }
1568  
1569  /**
1570   * intel_engine_resume - re-initializes the HW state of the engine
1571   * @engine: Engine to resume.
1572   *
1573   * Returns zero on success or an error code on failure.
1574   */
intel_engine_resume(struct intel_engine_cs * engine)1575  int intel_engine_resume(struct intel_engine_cs *engine)
1576  {
1577  	intel_engine_apply_workarounds(engine);
1578  	intel_engine_apply_whitelist(engine);
1579  
1580  	return engine->resume(engine);
1581  }
1582  
intel_engine_get_active_head(const struct intel_engine_cs * engine)1583  u64 intel_engine_get_active_head(const struct intel_engine_cs *engine)
1584  {
1585  	struct drm_i915_private *i915 = engine->i915;
1586  
1587  	u64 acthd;
1588  
1589  	if (GRAPHICS_VER(i915) >= 8)
1590  		acthd = ENGINE_READ64(engine, RING_ACTHD, RING_ACTHD_UDW);
1591  	else if (GRAPHICS_VER(i915) >= 4)
1592  		acthd = ENGINE_READ(engine, RING_ACTHD);
1593  	else
1594  		acthd = ENGINE_READ(engine, ACTHD);
1595  
1596  	return acthd;
1597  }
1598  
intel_engine_get_last_batch_head(const struct intel_engine_cs * engine)1599  u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
1600  {
1601  	u64 bbaddr;
1602  
1603  	if (GRAPHICS_VER(engine->i915) >= 8)
1604  		bbaddr = ENGINE_READ64(engine, RING_BBADDR, RING_BBADDR_UDW);
1605  	else
1606  		bbaddr = ENGINE_READ(engine, RING_BBADDR);
1607  
1608  	return bbaddr;
1609  }
1610  
stop_timeout(const struct intel_engine_cs * engine)1611  static unsigned long stop_timeout(const struct intel_engine_cs *engine)
1612  {
1613  	if (in_atomic() || irqs_disabled()) /* inside atomic preempt-reset? */
1614  		return 0;
1615  
1616  	/*
1617  	 * If we are doing a normal GPU reset, we can take our time and allow
1618  	 * the engine to quiesce. We've stopped submission to the engine, and
1619  	 * if we wait long enough an innocent context should complete and
1620  	 * leave the engine idle. So they should not be caught unaware by
1621  	 * the forthcoming GPU reset (which usually follows the stop_cs)!
1622  	 */
1623  	return READ_ONCE(engine->props.stop_timeout_ms);
1624  }
1625  
__intel_engine_stop_cs(struct intel_engine_cs * engine,int fast_timeout_us,int slow_timeout_ms)1626  static int __intel_engine_stop_cs(struct intel_engine_cs *engine,
1627  				  int fast_timeout_us,
1628  				  int slow_timeout_ms)
1629  {
1630  	struct intel_uncore *uncore = engine->uncore;
1631  	const i915_reg_t mode = RING_MI_MODE(engine->mmio_base);
1632  	int err;
1633  
1634  	intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
1635  
1636  	/*
1637  	 * Wa_22011802037: Prior to doing a reset, ensure CS is
1638  	 * stopped, set ring stop bit and prefetch disable bit to halt CS
1639  	 */
1640  	if (intel_engine_reset_needs_wa_22011802037(engine->gt))
1641  		intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base),
1642  				      _MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE));
1643  
1644  	err = __intel_wait_for_register_fw(engine->uncore, mode,
1645  					   MODE_IDLE, MODE_IDLE,
1646  					   fast_timeout_us,
1647  					   slow_timeout_ms,
1648  					   NULL);
1649  
1650  	/* A final mmio read to let GPU writes be hopefully flushed to memory */
1651  	intel_uncore_posting_read_fw(uncore, mode);
1652  	return err;
1653  }
1654  
intel_engine_stop_cs(struct intel_engine_cs * engine)1655  int intel_engine_stop_cs(struct intel_engine_cs *engine)
1656  {
1657  	int err = 0;
1658  
1659  	if (GRAPHICS_VER(engine->i915) < 3)
1660  		return -ENODEV;
1661  
1662  	ENGINE_TRACE(engine, "\n");
1663  	/*
1664  	 * TODO: Find out why occasionally stopping the CS times out. Seen
1665  	 * especially with gem_eio tests.
1666  	 *
1667  	 * Occasionally trying to stop the cs times out, but does not adversely
1668  	 * affect functionality. The timeout is set as a config parameter that
1669  	 * defaults to 100ms. In most cases the follow up operation is to wait
1670  	 * for pending MI_FORCE_WAKES. The assumption is that this timeout is
1671  	 * sufficient for any pending MI_FORCEWAKEs to complete. Once root
1672  	 * caused, the caller must check and handle the return from this
1673  	 * function.
1674  	 */
1675  	if (__intel_engine_stop_cs(engine, 1000, stop_timeout(engine))) {
1676  		ENGINE_TRACE(engine,
1677  			     "timed out on STOP_RING -> IDLE; HEAD:%04x, TAIL:%04x\n",
1678  			     ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR,
1679  			     ENGINE_READ_FW(engine, RING_TAIL) & TAIL_ADDR);
1680  
1681  		/*
1682  		 * Sometimes we observe that the idle flag is not
1683  		 * set even though the ring is empty. So double
1684  		 * check before giving up.
1685  		 */
1686  		if ((ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) !=
1687  		    (ENGINE_READ_FW(engine, RING_TAIL) & TAIL_ADDR))
1688  			err = -ETIMEDOUT;
1689  	}
1690  
1691  	return err;
1692  }
1693  
intel_engine_cancel_stop_cs(struct intel_engine_cs * engine)1694  void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
1695  {
1696  	ENGINE_TRACE(engine, "\n");
1697  
1698  	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
1699  }
1700  
__cs_pending_mi_force_wakes(struct intel_engine_cs * engine)1701  static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
1702  {
1703  	static const i915_reg_t _reg[I915_NUM_ENGINES] = {
1704  		[RCS0] = MSG_IDLE_CS,
1705  		[BCS0] = MSG_IDLE_BCS,
1706  		[VCS0] = MSG_IDLE_VCS0,
1707  		[VCS1] = MSG_IDLE_VCS1,
1708  		[VCS2] = MSG_IDLE_VCS2,
1709  		[VCS3] = MSG_IDLE_VCS3,
1710  		[VCS4] = MSG_IDLE_VCS4,
1711  		[VCS5] = MSG_IDLE_VCS5,
1712  		[VCS6] = MSG_IDLE_VCS6,
1713  		[VCS7] = MSG_IDLE_VCS7,
1714  		[VECS0] = MSG_IDLE_VECS0,
1715  		[VECS1] = MSG_IDLE_VECS1,
1716  		[VECS2] = MSG_IDLE_VECS2,
1717  		[VECS3] = MSG_IDLE_VECS3,
1718  		[CCS0] = MSG_IDLE_CS,
1719  		[CCS1] = MSG_IDLE_CS,
1720  		[CCS2] = MSG_IDLE_CS,
1721  		[CCS3] = MSG_IDLE_CS,
1722  	};
1723  	u32 val;
1724  
1725  	if (!_reg[engine->id].reg)
1726  		return 0;
1727  
1728  	val = intel_uncore_read(engine->uncore, _reg[engine->id]);
1729  
1730  	/* bits[29:25] & bits[13:9] >> shift */
1731  	return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT;
1732  }
1733  
__gpm_wait_for_fw_complete(struct intel_gt * gt,u32 fw_mask)1734  static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask)
1735  {
1736  	int ret;
1737  
1738  	/* Ensure GPM receives fw up/down after CS is stopped */
1739  	udelay(1);
1740  
1741  	/* Wait for forcewake request to complete in GPM */
1742  	ret =  __intel_wait_for_register_fw(gt->uncore,
1743  					    GEN9_PWRGT_DOMAIN_STATUS,
1744  					    fw_mask, fw_mask, 5000, 0, NULL);
1745  
1746  	/* Ensure CS receives fw ack from GPM */
1747  	udelay(1);
1748  
1749  	if (ret)
1750  		GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret);
1751  }
1752  
1753  /*
1754   * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any
1755   * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The
1756   * pending status is indicated by bits[13:9] (masked by bits[29:25]) in the
1757   * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we
1758   * are concerned only with the gt reset here, we use a logical OR of pending
1759   * forcewakeups from all reset domains and then wait for them to complete by
1760   * querying PWRGT_DOMAIN_STATUS.
1761   */
intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs * engine)1762  void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine)
1763  {
1764  	u32 fw_pending = __cs_pending_mi_force_wakes(engine);
1765  
1766  	if (fw_pending)
1767  		__gpm_wait_for_fw_complete(engine->gt, fw_pending);
1768  }
1769  
1770  /* NB: please notice the memset */
intel_engine_get_instdone(const struct intel_engine_cs * engine,struct intel_instdone * instdone)1771  void intel_engine_get_instdone(const struct intel_engine_cs *engine,
1772  			       struct intel_instdone *instdone)
1773  {
1774  	struct drm_i915_private *i915 = engine->i915;
1775  	struct intel_uncore *uncore = engine->uncore;
1776  	u32 mmio_base = engine->mmio_base;
1777  	int slice;
1778  	int subslice;
1779  	int iter;
1780  
1781  	memset(instdone, 0, sizeof(*instdone));
1782  
1783  	if (GRAPHICS_VER(i915) >= 8) {
1784  		instdone->instdone =
1785  			intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
1786  
1787  		if (engine->id != RCS0)
1788  			return;
1789  
1790  		instdone->slice_common =
1791  			intel_uncore_read(uncore, GEN7_SC_INSTDONE);
1792  		if (GRAPHICS_VER(i915) >= 12) {
1793  			instdone->slice_common_extra[0] =
1794  				intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA);
1795  			instdone->slice_common_extra[1] =
1796  				intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2);
1797  		}
1798  
1799  		for_each_ss_steering(iter, engine->gt, slice, subslice) {
1800  			instdone->sampler[slice][subslice] =
1801  				intel_gt_mcr_read(engine->gt,
1802  						  GEN8_SAMPLER_INSTDONE,
1803  						  slice, subslice);
1804  			instdone->row[slice][subslice] =
1805  				intel_gt_mcr_read(engine->gt,
1806  						  GEN8_ROW_INSTDONE,
1807  						  slice, subslice);
1808  		}
1809  
1810  		if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) {
1811  			for_each_ss_steering(iter, engine->gt, slice, subslice)
1812  				instdone->geom_svg[slice][subslice] =
1813  					intel_gt_mcr_read(engine->gt,
1814  							  XEHPG_INSTDONE_GEOM_SVG,
1815  							  slice, subslice);
1816  		}
1817  	} else if (GRAPHICS_VER(i915) >= 7) {
1818  		instdone->instdone =
1819  			intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
1820  
1821  		if (engine->id != RCS0)
1822  			return;
1823  
1824  		instdone->slice_common =
1825  			intel_uncore_read(uncore, GEN7_SC_INSTDONE);
1826  		instdone->sampler[0][0] =
1827  			intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE);
1828  		instdone->row[0][0] =
1829  			intel_uncore_read(uncore, GEN7_ROW_INSTDONE);
1830  	} else if (GRAPHICS_VER(i915) >= 4) {
1831  		instdone->instdone =
1832  			intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
1833  		if (engine->id == RCS0)
1834  			/* HACK: Using the wrong struct member */
1835  			instdone->slice_common =
1836  				intel_uncore_read(uncore, GEN4_INSTDONE1);
1837  	} else {
1838  		instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE);
1839  	}
1840  }
1841  
ring_is_idle(struct intel_engine_cs * engine)1842  static bool ring_is_idle(struct intel_engine_cs *engine)
1843  {
1844  	bool idle = true;
1845  
1846  	if (I915_SELFTEST_ONLY(!engine->mmio_base))
1847  		return true;
1848  
1849  	if (!intel_engine_pm_get_if_awake(engine))
1850  		return true;
1851  
1852  	/* First check that no commands are left in the ring */
1853  	if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) !=
1854  	    (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR))
1855  		idle = false;
1856  
1857  	/* No bit for gen2, so assume the CS parser is idle */
1858  	if (GRAPHICS_VER(engine->i915) > 2 &&
1859  	    !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE))
1860  		idle = false;
1861  
1862  	intel_engine_pm_put(engine);
1863  
1864  	return idle;
1865  }
1866  
__intel_engine_flush_submission(struct intel_engine_cs * engine,bool sync)1867  void __intel_engine_flush_submission(struct intel_engine_cs *engine, bool sync)
1868  {
1869  	struct tasklet_struct *t = &engine->sched_engine->tasklet;
1870  
1871  	if (!t->callback)
1872  		return;
1873  
1874  	local_bh_disable();
1875  	if (tasklet_trylock(t)) {
1876  		/* Must wait for any GPU reset in progress. */
1877  		if (__tasklet_is_enabled(t))
1878  			t->callback(t);
1879  		tasklet_unlock(t);
1880  	}
1881  	local_bh_enable();
1882  
1883  	/* Synchronise and wait for the tasklet on another CPU */
1884  	if (sync)
1885  		tasklet_unlock_wait(t);
1886  }
1887  
1888  /**
1889   * intel_engine_is_idle() - Report if the engine has finished process all work
1890   * @engine: the intel_engine_cs
1891   *
1892   * Return true if there are no requests pending, nothing left to be submitted
1893   * to hardware, and that the engine is idle.
1894   */
intel_engine_is_idle(struct intel_engine_cs * engine)1895  bool intel_engine_is_idle(struct intel_engine_cs *engine)
1896  {
1897  	/* More white lies, if wedged, hw state is inconsistent */
1898  	if (intel_gt_is_wedged(engine->gt))
1899  		return true;
1900  
1901  	if (!intel_engine_pm_is_awake(engine))
1902  		return true;
1903  
1904  	/* Waiting to drain ELSP? */
1905  	intel_synchronize_hardirq(engine->i915);
1906  	intel_engine_flush_submission(engine);
1907  
1908  	/* ELSP is empty, but there are ready requests? E.g. after reset */
1909  	if (!i915_sched_engine_is_empty(engine->sched_engine))
1910  		return false;
1911  
1912  	/* Ring stopped? */
1913  	return ring_is_idle(engine);
1914  }
1915  
intel_engines_are_idle(struct intel_gt * gt)1916  bool intel_engines_are_idle(struct intel_gt *gt)
1917  {
1918  	struct intel_engine_cs *engine;
1919  	enum intel_engine_id id;
1920  
1921  	/*
1922  	 * If the driver is wedged, HW state may be very inconsistent and
1923  	 * report that it is still busy, even though we have stopped using it.
1924  	 */
1925  	if (intel_gt_is_wedged(gt))
1926  		return true;
1927  
1928  	/* Already parked (and passed an idleness test); must still be idle */
1929  	if (!READ_ONCE(gt->awake))
1930  		return true;
1931  
1932  	for_each_engine(engine, gt, id) {
1933  		if (!intel_engine_is_idle(engine))
1934  			return false;
1935  	}
1936  
1937  	return true;
1938  }
1939  
intel_engine_irq_enable(struct intel_engine_cs * engine)1940  bool intel_engine_irq_enable(struct intel_engine_cs *engine)
1941  {
1942  	if (!engine->irq_enable)
1943  		return false;
1944  
1945  	/* Caller disables interrupts */
1946  	spin_lock(engine->gt->irq_lock);
1947  	engine->irq_enable(engine);
1948  	spin_unlock(engine->gt->irq_lock);
1949  
1950  	return true;
1951  }
1952  
intel_engine_irq_disable(struct intel_engine_cs * engine)1953  void intel_engine_irq_disable(struct intel_engine_cs *engine)
1954  {
1955  	if (!engine->irq_disable)
1956  		return;
1957  
1958  	/* Caller disables interrupts */
1959  	spin_lock(engine->gt->irq_lock);
1960  	engine->irq_disable(engine);
1961  	spin_unlock(engine->gt->irq_lock);
1962  }
1963  
intel_engines_reset_default_submission(struct intel_gt * gt)1964  void intel_engines_reset_default_submission(struct intel_gt *gt)
1965  {
1966  	struct intel_engine_cs *engine;
1967  	enum intel_engine_id id;
1968  
1969  	for_each_engine(engine, gt, id) {
1970  		if (engine->sanitize)
1971  			engine->sanitize(engine);
1972  
1973  		engine->set_default_submission(engine);
1974  	}
1975  }
1976  
intel_engine_can_store_dword(struct intel_engine_cs * engine)1977  bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
1978  {
1979  	switch (GRAPHICS_VER(engine->i915)) {
1980  	case 2:
1981  		return false; /* uses physical not virtual addresses */
1982  	case 3:
1983  		/* maybe only uses physical not virtual addresses */
1984  		return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915));
1985  	case 4:
1986  		return !IS_I965G(engine->i915); /* who knows! */
1987  	case 6:
1988  		return engine->class != VIDEO_DECODE_CLASS; /* b0rked */
1989  	default:
1990  		return true;
1991  	}
1992  }
1993  
get_timeline(struct i915_request * rq)1994  static struct intel_timeline *get_timeline(struct i915_request *rq)
1995  {
1996  	struct intel_timeline *tl;
1997  
1998  	/*
1999  	 * Even though we are holding the engine->sched_engine->lock here, there
2000  	 * is no control over the submission queue per-se and we are
2001  	 * inspecting the active state at a random point in time, with an
2002  	 * unknown queue. Play safe and make sure the timeline remains valid.
2003  	 * (Only being used for pretty printing, one extra kref shouldn't
2004  	 * cause a camel stampede!)
2005  	 */
2006  	rcu_read_lock();
2007  	tl = rcu_dereference(rq->timeline);
2008  	if (!kref_get_unless_zero(&tl->kref))
2009  		tl = NULL;
2010  	rcu_read_unlock();
2011  
2012  	return tl;
2013  }
2014  
print_ring(char * buf,int sz,struct i915_request * rq)2015  static int print_ring(char *buf, int sz, struct i915_request *rq)
2016  {
2017  	int len = 0;
2018  
2019  	if (!i915_request_signaled(rq)) {
2020  		struct intel_timeline *tl = get_timeline(rq);
2021  
2022  		len = scnprintf(buf, sz,
2023  				"ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ",
2024  				i915_ggtt_offset(rq->ring->vma),
2025  				tl ? tl->hwsp_offset : 0,
2026  				hwsp_seqno(rq),
2027  				DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context),
2028  						      1000 * 1000));
2029  
2030  		if (tl)
2031  			intel_timeline_put(tl);
2032  	}
2033  
2034  	return len;
2035  }
2036  
hexdump(struct drm_printer * m,const void * buf,size_t len)2037  static void hexdump(struct drm_printer *m, const void *buf, size_t len)
2038  {
2039  	const size_t rowsize = 8 * sizeof(u32);
2040  	const void *prev = NULL;
2041  	bool skip = false;
2042  	size_t pos;
2043  
2044  	for (pos = 0; pos < len; pos += rowsize) {
2045  		char line[128];
2046  
2047  		if (prev && !memcmp(prev, buf + pos, rowsize)) {
2048  			if (!skip) {
2049  				drm_printf(m, "*\n");
2050  				skip = true;
2051  			}
2052  			continue;
2053  		}
2054  
2055  		WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
2056  						rowsize, sizeof(u32),
2057  						line, sizeof(line),
2058  						false) >= sizeof(line));
2059  		drm_printf(m, "[%04zx] %s\n", pos, line);
2060  
2061  		prev = buf + pos;
2062  		skip = false;
2063  	}
2064  }
2065  
repr_timer(const struct timer_list * t)2066  static const char *repr_timer(const struct timer_list *t)
2067  {
2068  	if (!READ_ONCE(t->expires))
2069  		return "inactive";
2070  
2071  	if (timer_pending(t))
2072  		return "active";
2073  
2074  	return "expired";
2075  }
2076  
intel_engine_print_registers(struct intel_engine_cs * engine,struct drm_printer * m)2077  static void intel_engine_print_registers(struct intel_engine_cs *engine,
2078  					 struct drm_printer *m)
2079  {
2080  	struct drm_i915_private *i915 = engine->i915;
2081  	struct intel_engine_execlists * const execlists = &engine->execlists;
2082  	u64 addr;
2083  
2084  	if (engine->id == RENDER_CLASS && IS_GRAPHICS_VER(i915, 4, 7))
2085  		drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
2086  	if (HAS_EXECLISTS(i915)) {
2087  		drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
2088  			   ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
2089  		drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
2090  			   ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
2091  	}
2092  	drm_printf(m, "\tRING_START: 0x%08x\n",
2093  		   ENGINE_READ(engine, RING_START));
2094  	drm_printf(m, "\tRING_HEAD:  0x%08x\n",
2095  		   ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR);
2096  	drm_printf(m, "\tRING_TAIL:  0x%08x\n",
2097  		   ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR);
2098  	drm_printf(m, "\tRING_CTL:   0x%08x%s\n",
2099  		   ENGINE_READ(engine, RING_CTL),
2100  		   ENGINE_READ(engine, RING_CTL) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : "");
2101  	if (GRAPHICS_VER(engine->i915) > 2) {
2102  		drm_printf(m, "\tRING_MODE:  0x%08x%s\n",
2103  			   ENGINE_READ(engine, RING_MI_MODE),
2104  			   ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : "");
2105  	}
2106  
2107  	if (GRAPHICS_VER(i915) >= 6) {
2108  		drm_printf(m, "\tRING_IMR:   0x%08x\n",
2109  			   ENGINE_READ(engine, RING_IMR));
2110  		drm_printf(m, "\tRING_ESR:   0x%08x\n",
2111  			   ENGINE_READ(engine, RING_ESR));
2112  		drm_printf(m, "\tRING_EMR:   0x%08x\n",
2113  			   ENGINE_READ(engine, RING_EMR));
2114  		drm_printf(m, "\tRING_EIR:   0x%08x\n",
2115  			   ENGINE_READ(engine, RING_EIR));
2116  	}
2117  
2118  	addr = intel_engine_get_active_head(engine);
2119  	drm_printf(m, "\tACTHD:  0x%08x_%08x\n",
2120  		   upper_32_bits(addr), lower_32_bits(addr));
2121  	addr = intel_engine_get_last_batch_head(engine);
2122  	drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
2123  		   upper_32_bits(addr), lower_32_bits(addr));
2124  	if (GRAPHICS_VER(i915) >= 8)
2125  		addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW);
2126  	else if (GRAPHICS_VER(i915) >= 4)
2127  		addr = ENGINE_READ(engine, RING_DMA_FADD);
2128  	else
2129  		addr = ENGINE_READ(engine, DMA_FADD_I8XX);
2130  	drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n",
2131  		   upper_32_bits(addr), lower_32_bits(addr));
2132  	if (GRAPHICS_VER(i915) >= 4) {
2133  		drm_printf(m, "\tIPEIR: 0x%08x\n",
2134  			   ENGINE_READ(engine, RING_IPEIR));
2135  		drm_printf(m, "\tIPEHR: 0x%08x\n",
2136  			   ENGINE_READ(engine, RING_IPEHR));
2137  	} else {
2138  		drm_printf(m, "\tIPEIR: 0x%08x\n", ENGINE_READ(engine, IPEIR));
2139  		drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
2140  	}
2141  
2142  	if (HAS_EXECLISTS(i915) && !intel_engine_uses_guc(engine)) {
2143  		struct i915_request * const *port, *rq;
2144  		const u32 *hws =
2145  			&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
2146  		const u8 num_entries = execlists->csb_size;
2147  		unsigned int idx;
2148  		u8 read, write;
2149  
2150  		drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n",
2151  			   str_yes_no(test_bit(TASKLET_STATE_SCHED, &engine->sched_engine->tasklet.state)),
2152  			   str_enabled_disabled(!atomic_read(&engine->sched_engine->tasklet.count)),
2153  			   repr_timer(&engine->execlists.preempt),
2154  			   repr_timer(&engine->execlists.timer));
2155  
2156  		read = execlists->csb_head;
2157  		write = READ_ONCE(*execlists->csb_write);
2158  
2159  		drm_printf(m, "\tExeclist status: 0x%08x %08x; CSB read:%d, write:%d, entries:%d\n",
2160  			   ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
2161  			   ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
2162  			   read, write, num_entries);
2163  
2164  		if (read >= num_entries)
2165  			read = 0;
2166  		if (write >= num_entries)
2167  			write = 0;
2168  		if (read > write)
2169  			write += num_entries;
2170  		while (read < write) {
2171  			idx = ++read % num_entries;
2172  			drm_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
2173  				   idx, hws[idx * 2], hws[idx * 2 + 1]);
2174  		}
2175  
2176  		i915_sched_engine_active_lock_bh(engine->sched_engine);
2177  		rcu_read_lock();
2178  		for (port = execlists->active; (rq = *port); port++) {
2179  			char hdr[160];
2180  			int len;
2181  
2182  			len = scnprintf(hdr, sizeof(hdr),
2183  					"\t\tActive[%d]:  ccid:%08x%s%s, ",
2184  					(int)(port - execlists->active),
2185  					rq->context->lrc.ccid,
2186  					intel_context_is_closed(rq->context) ? "!" : "",
2187  					intel_context_is_banned(rq->context) ? "*" : "");
2188  			len += print_ring(hdr + len, sizeof(hdr) - len, rq);
2189  			scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
2190  			i915_request_show(m, rq, hdr, 0);
2191  		}
2192  		for (port = execlists->pending; (rq = *port); port++) {
2193  			char hdr[160];
2194  			int len;
2195  
2196  			len = scnprintf(hdr, sizeof(hdr),
2197  					"\t\tPending[%d]: ccid:%08x%s%s, ",
2198  					(int)(port - execlists->pending),
2199  					rq->context->lrc.ccid,
2200  					intel_context_is_closed(rq->context) ? "!" : "",
2201  					intel_context_is_banned(rq->context) ? "*" : "");
2202  			len += print_ring(hdr + len, sizeof(hdr) - len, rq);
2203  			scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
2204  			i915_request_show(m, rq, hdr, 0);
2205  		}
2206  		rcu_read_unlock();
2207  		i915_sched_engine_active_unlock_bh(engine->sched_engine);
2208  	} else if (GRAPHICS_VER(i915) > 6) {
2209  		drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
2210  			   ENGINE_READ(engine, RING_PP_DIR_BASE));
2211  		drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
2212  			   ENGINE_READ(engine, RING_PP_DIR_BASE_READ));
2213  		drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n",
2214  			   ENGINE_READ(engine, RING_PP_DIR_DCLV));
2215  	}
2216  }
2217  
print_request_ring(struct drm_printer * m,struct i915_request * rq)2218  static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
2219  {
2220  	struct i915_vma_resource *vma_res = rq->batch_res;
2221  	void *ring;
2222  	int size;
2223  
2224  	drm_printf(m,
2225  		   "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
2226  		   rq->head, rq->postfix, rq->tail,
2227  		   vma_res ? upper_32_bits(vma_res->start) : ~0u,
2228  		   vma_res ? lower_32_bits(vma_res->start) : ~0u);
2229  
2230  	size = rq->tail - rq->head;
2231  	if (rq->tail < rq->head)
2232  		size += rq->ring->size;
2233  
2234  	ring = kmalloc(size, GFP_ATOMIC);
2235  	if (ring) {
2236  		const void *vaddr = rq->ring->vaddr;
2237  		unsigned int head = rq->head;
2238  		unsigned int len = 0;
2239  
2240  		if (rq->tail < head) {
2241  			len = rq->ring->size - head;
2242  			memcpy(ring, vaddr + head, len);
2243  			head = 0;
2244  		}
2245  		memcpy(ring + len, vaddr + head, size - len);
2246  
2247  		hexdump(m, ring, size);
2248  		kfree(ring);
2249  	}
2250  }
2251  
read_ul(void * p,size_t x)2252  static unsigned long read_ul(void *p, size_t x)
2253  {
2254  	return *(unsigned long *)(p + x);
2255  }
2256  
print_properties(struct intel_engine_cs * engine,struct drm_printer * m)2257  static void print_properties(struct intel_engine_cs *engine,
2258  			     struct drm_printer *m)
2259  {
2260  	static const struct pmap {
2261  		size_t offset;
2262  		const char *name;
2263  	} props[] = {
2264  #define P(x) { \
2265  	.offset = offsetof(typeof(engine->props), x), \
2266  	.name = #x \
2267  }
2268  		P(heartbeat_interval_ms),
2269  		P(max_busywait_duration_ns),
2270  		P(preempt_timeout_ms),
2271  		P(stop_timeout_ms),
2272  		P(timeslice_duration_ms),
2273  
2274  		{},
2275  #undef P
2276  	};
2277  	const struct pmap *p;
2278  
2279  	drm_printf(m, "\tProperties:\n");
2280  	for (p = props; p->name; p++)
2281  		drm_printf(m, "\t\t%s: %lu [default %lu]\n",
2282  			   p->name,
2283  			   read_ul(&engine->props, p->offset),
2284  			   read_ul(&engine->defaults, p->offset));
2285  }
2286  
engine_dump_request(struct i915_request * rq,struct drm_printer * m,const char * msg)2287  static void engine_dump_request(struct i915_request *rq, struct drm_printer *m, const char *msg)
2288  {
2289  	struct intel_timeline *tl = get_timeline(rq);
2290  
2291  	i915_request_show(m, rq, msg, 0);
2292  
2293  	drm_printf(m, "\t\tring->start:  0x%08x\n",
2294  		   i915_ggtt_offset(rq->ring->vma));
2295  	drm_printf(m, "\t\tring->head:   0x%08x\n",
2296  		   rq->ring->head);
2297  	drm_printf(m, "\t\tring->tail:   0x%08x\n",
2298  		   rq->ring->tail);
2299  	drm_printf(m, "\t\tring->emit:   0x%08x\n",
2300  		   rq->ring->emit);
2301  	drm_printf(m, "\t\tring->space:  0x%08x\n",
2302  		   rq->ring->space);
2303  
2304  	if (tl) {
2305  		drm_printf(m, "\t\tring->hwsp:   0x%08x\n",
2306  			   tl->hwsp_offset);
2307  		intel_timeline_put(tl);
2308  	}
2309  
2310  	print_request_ring(m, rq);
2311  
2312  	if (rq->context->lrc_reg_state) {
2313  		drm_printf(m, "Logical Ring Context:\n");
2314  		hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE);
2315  	}
2316  }
2317  
intel_engine_dump_active_requests(struct list_head * requests,struct i915_request * hung_rq,struct drm_printer * m)2318  void intel_engine_dump_active_requests(struct list_head *requests,
2319  				       struct i915_request *hung_rq,
2320  				       struct drm_printer *m)
2321  {
2322  	struct i915_request *rq;
2323  	const char *msg;
2324  	enum i915_request_state state;
2325  
2326  	list_for_each_entry(rq, requests, sched.link) {
2327  		if (rq == hung_rq)
2328  			continue;
2329  
2330  		state = i915_test_request_state(rq);
2331  		if (state < I915_REQUEST_QUEUED)
2332  			continue;
2333  
2334  		if (state == I915_REQUEST_ACTIVE)
2335  			msg = "\t\tactive on engine";
2336  		else
2337  			msg = "\t\tactive in queue";
2338  
2339  		engine_dump_request(rq, m, msg);
2340  	}
2341  }
2342  
engine_dump_active_requests(struct intel_engine_cs * engine,struct drm_printer * m)2343  static void engine_dump_active_requests(struct intel_engine_cs *engine,
2344  					struct drm_printer *m)
2345  {
2346  	struct intel_context *hung_ce = NULL;
2347  	struct i915_request *hung_rq = NULL;
2348  
2349  	/*
2350  	 * No need for an engine->irq_seqno_barrier() before the seqno reads.
2351  	 * The GPU is still running so requests are still executing and any
2352  	 * hardware reads will be out of date by the time they are reported.
2353  	 * But the intention here is just to report an instantaneous snapshot
2354  	 * so that's fine.
2355  	 */
2356  	intel_engine_get_hung_entity(engine, &hung_ce, &hung_rq);
2357  
2358  	drm_printf(m, "\tRequests:\n");
2359  
2360  	if (hung_rq)
2361  		engine_dump_request(hung_rq, m, "\t\thung");
2362  	else if (hung_ce)
2363  		drm_printf(m, "\t\tGot hung ce but no hung rq!\n");
2364  
2365  	if (intel_uc_uses_guc_submission(&engine->gt->uc))
2366  		intel_guc_dump_active_requests(engine, hung_rq, m);
2367  	else
2368  		intel_execlists_dump_active_requests(engine, hung_rq, m);
2369  
2370  	if (hung_rq)
2371  		i915_request_put(hung_rq);
2372  }
2373  
intel_engine_dump(struct intel_engine_cs * engine,struct drm_printer * m,const char * header,...)2374  void intel_engine_dump(struct intel_engine_cs *engine,
2375  		       struct drm_printer *m,
2376  		       const char *header, ...)
2377  {
2378  	struct i915_gpu_error * const error = &engine->i915->gpu_error;
2379  	struct i915_request *rq;
2380  	intel_wakeref_t wakeref;
2381  	ktime_t dummy;
2382  
2383  	if (header) {
2384  		va_list ap;
2385  
2386  		va_start(ap, header);
2387  		drm_vprintf(m, header, &ap);
2388  		va_end(ap);
2389  	}
2390  
2391  	if (intel_gt_is_wedged(engine->gt))
2392  		drm_printf(m, "*** WEDGED ***\n");
2393  
2394  	drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
2395  	drm_printf(m, "\tBarriers?: %s\n",
2396  		   str_yes_no(!llist_empty(&engine->barrier_tasks)));
2397  	drm_printf(m, "\tLatency: %luus\n",
2398  		   ewma__engine_latency_read(&engine->latency));
2399  	if (intel_engine_supports_stats(engine))
2400  		drm_printf(m, "\tRuntime: %llums\n",
2401  			   ktime_to_ms(intel_engine_get_busy_time(engine,
2402  								  &dummy)));
2403  	drm_printf(m, "\tForcewake: %x domains, %d active\n",
2404  		   engine->fw_domain, READ_ONCE(engine->fw_active));
2405  
2406  	rcu_read_lock();
2407  	rq = READ_ONCE(engine->heartbeat.systole);
2408  	if (rq)
2409  		drm_printf(m, "\tHeartbeat: %d ms ago\n",
2410  			   jiffies_to_msecs(jiffies - rq->emitted_jiffies));
2411  	rcu_read_unlock();
2412  	drm_printf(m, "\tReset count: %d (global %d)\n",
2413  		   i915_reset_engine_count(error, engine),
2414  		   i915_reset_count(error));
2415  	print_properties(engine, m);
2416  
2417  	engine_dump_active_requests(engine, m);
2418  
2419  	drm_printf(m, "\tMMIO base:  0x%08x\n", engine->mmio_base);
2420  	wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
2421  	if (wakeref) {
2422  		intel_engine_print_registers(engine, m);
2423  		intel_runtime_pm_put(engine->uncore->rpm, wakeref);
2424  	} else {
2425  		drm_printf(m, "\tDevice is asleep; skipping register dump\n");
2426  	}
2427  
2428  	intel_execlists_show_requests(engine, m, i915_request_show, 8);
2429  
2430  	drm_printf(m, "HWSP:\n");
2431  	hexdump(m, engine->status_page.addr, PAGE_SIZE);
2432  
2433  	drm_printf(m, "Idle? %s\n", str_yes_no(intel_engine_is_idle(engine)));
2434  
2435  	intel_engine_print_breadcrumbs(engine, m);
2436  }
2437  
2438  /**
2439   * intel_engine_get_busy_time() - Return current accumulated engine busyness
2440   * @engine: engine to report on
2441   * @now: monotonic timestamp of sampling
2442   *
2443   * Returns accumulated time @engine was busy since engine stats were enabled.
2444   */
intel_engine_get_busy_time(struct intel_engine_cs * engine,ktime_t * now)2445  ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
2446  {
2447  	return engine->busyness(engine, now);
2448  }
2449  
2450  struct intel_context *
intel_engine_create_virtual(struct intel_engine_cs ** siblings,unsigned int count,unsigned long flags)2451  intel_engine_create_virtual(struct intel_engine_cs **siblings,
2452  			    unsigned int count, unsigned long flags)
2453  {
2454  	if (count == 0)
2455  		return ERR_PTR(-EINVAL);
2456  
2457  	if (count == 1 && !(flags & FORCE_VIRTUAL))
2458  		return intel_context_create(siblings[0]);
2459  
2460  	GEM_BUG_ON(!siblings[0]->cops->create_virtual);
2461  	return siblings[0]->cops->create_virtual(siblings, count, flags);
2462  }
2463  
engine_execlist_find_hung_request(struct intel_engine_cs * engine)2464  static struct i915_request *engine_execlist_find_hung_request(struct intel_engine_cs *engine)
2465  {
2466  	struct i915_request *request, *active = NULL;
2467  
2468  	/*
2469  	 * This search does not work in GuC submission mode. However, the GuC
2470  	 * will report the hanging context directly to the driver itself. So
2471  	 * the driver should never get here when in GuC mode.
2472  	 */
2473  	GEM_BUG_ON(intel_uc_uses_guc_submission(&engine->gt->uc));
2474  
2475  	/*
2476  	 * We are called by the error capture, reset and to dump engine
2477  	 * state at random points in time. In particular, note that neither is
2478  	 * crucially ordered with an interrupt. After a hang, the GPU is dead
2479  	 * and we assume that no more writes can happen (we waited long enough
2480  	 * for all writes that were in transaction to be flushed) - adding an
2481  	 * extra delay for a recent interrupt is pointless. Hence, we do
2482  	 * not need an engine->irq_seqno_barrier() before the seqno reads.
2483  	 * At all other times, we must assume the GPU is still running, but
2484  	 * we only care about the snapshot of this moment.
2485  	 */
2486  	lockdep_assert_held(&engine->sched_engine->lock);
2487  
2488  	rcu_read_lock();
2489  	request = execlists_active(&engine->execlists);
2490  	if (request) {
2491  		struct intel_timeline *tl = request->context->timeline;
2492  
2493  		list_for_each_entry_from_reverse(request, &tl->requests, link) {
2494  			if (__i915_request_is_complete(request))
2495  				break;
2496  
2497  			active = request;
2498  		}
2499  	}
2500  	rcu_read_unlock();
2501  	if (active)
2502  		return active;
2503  
2504  	list_for_each_entry(request, &engine->sched_engine->requests,
2505  			    sched.link) {
2506  		if (i915_test_request_state(request) != I915_REQUEST_ACTIVE)
2507  			continue;
2508  
2509  		active = request;
2510  		break;
2511  	}
2512  
2513  	return active;
2514  }
2515  
intel_engine_get_hung_entity(struct intel_engine_cs * engine,struct intel_context ** ce,struct i915_request ** rq)2516  void intel_engine_get_hung_entity(struct intel_engine_cs *engine,
2517  				  struct intel_context **ce, struct i915_request **rq)
2518  {
2519  	unsigned long flags;
2520  
2521  	*ce = intel_engine_get_hung_context(engine);
2522  	if (*ce) {
2523  		intel_engine_clear_hung_context(engine);
2524  
2525  		*rq = intel_context_get_active_request(*ce);
2526  		return;
2527  	}
2528  
2529  	/*
2530  	 * Getting here with GuC enabled means it is a forced error capture
2531  	 * with no actual hang. So, no need to attempt the execlist search.
2532  	 */
2533  	if (intel_uc_uses_guc_submission(&engine->gt->uc))
2534  		return;
2535  
2536  	spin_lock_irqsave(&engine->sched_engine->lock, flags);
2537  	*rq = engine_execlist_find_hung_request(engine);
2538  	if (*rq)
2539  		*rq = i915_request_get_rcu(*rq);
2540  	spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
2541  }
2542  
xehp_enable_ccs_engines(struct intel_engine_cs * engine)2543  void xehp_enable_ccs_engines(struct intel_engine_cs *engine)
2544  {
2545  	/*
2546  	 * If there are any non-fused-off CCS engines, we need to enable CCS
2547  	 * support in the RCU_MODE register.  This only needs to be done once,
2548  	 * so for simplicity we'll take care of this in the RCS engine's
2549  	 * resume handler; since the RCS and all CCS engines belong to the
2550  	 * same reset domain and are reset together, this will also take care
2551  	 * of re-applying the setting after i915-triggered resets.
2552  	 */
2553  	if (!CCS_MASK(engine->gt))
2554  		return;
2555  
2556  	intel_uncore_write(engine->uncore, GEN12_RCU_MODE,
2557  			   _MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE));
2558  }
2559  
2560  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2561  #include "mock_engine.c"
2562  #include "selftest_engine.c"
2563  #include "selftest_engine_cs.c"
2564  #endif
2565