1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_hw_engine.h"
7 
8 #include <linux/nospec.h>
9 
10 #include <drm/drm_managed.h>
11 #include <uapi/drm/xe_drm.h>
12 
13 #include "regs/xe_engine_regs.h"
14 #include "regs/xe_gt_regs.h"
15 #include "xe_assert.h"
16 #include "xe_bo.h"
17 #include "xe_device.h"
18 #include "xe_execlist.h"
19 #include "xe_force_wake.h"
20 #include "xe_gsc.h"
21 #include "xe_gt.h"
22 #include "xe_gt_ccs_mode.h"
23 #include "xe_gt_printk.h"
24 #include "xe_gt_mcr.h"
25 #include "xe_gt_topology.h"
26 #include "xe_hw_engine_group.h"
27 #include "xe_hw_fence.h"
28 #include "xe_irq.h"
29 #include "xe_lrc.h"
30 #include "xe_macros.h"
31 #include "xe_mmio.h"
32 #include "xe_reg_sr.h"
33 #include "xe_reg_whitelist.h"
34 #include "xe_rtp.h"
35 #include "xe_sched_job.h"
36 #include "xe_sriov.h"
37 #include "xe_tuning.h"
38 #include "xe_uc_fw.h"
39 #include "xe_wa.h"
40 
41 #define MAX_MMIO_BASES 3
42 struct engine_info {
43 	const char *name;
44 	unsigned int class : 8;
45 	unsigned int instance : 8;
46 	unsigned int irq_offset : 8;
47 	enum xe_force_wake_domains domain;
48 	u32 mmio_base;
49 };
50 
51 static const struct engine_info engine_infos[] = {
52 	[XE_HW_ENGINE_RCS0] = {
53 		.name = "rcs0",
54 		.class = XE_ENGINE_CLASS_RENDER,
55 		.instance = 0,
56 		.irq_offset = ilog2(INTR_RCS0),
57 		.domain = XE_FW_RENDER,
58 		.mmio_base = RENDER_RING_BASE,
59 	},
60 	[XE_HW_ENGINE_BCS0] = {
61 		.name = "bcs0",
62 		.class = XE_ENGINE_CLASS_COPY,
63 		.instance = 0,
64 		.irq_offset = ilog2(INTR_BCS(0)),
65 		.domain = XE_FW_RENDER,
66 		.mmio_base = BLT_RING_BASE,
67 	},
68 	[XE_HW_ENGINE_BCS1] = {
69 		.name = "bcs1",
70 		.class = XE_ENGINE_CLASS_COPY,
71 		.instance = 1,
72 		.irq_offset = ilog2(INTR_BCS(1)),
73 		.domain = XE_FW_RENDER,
74 		.mmio_base = XEHPC_BCS1_RING_BASE,
75 	},
76 	[XE_HW_ENGINE_BCS2] = {
77 		.name = "bcs2",
78 		.class = XE_ENGINE_CLASS_COPY,
79 		.instance = 2,
80 		.irq_offset = ilog2(INTR_BCS(2)),
81 		.domain = XE_FW_RENDER,
82 		.mmio_base = XEHPC_BCS2_RING_BASE,
83 	},
84 	[XE_HW_ENGINE_BCS3] = {
85 		.name = "bcs3",
86 		.class = XE_ENGINE_CLASS_COPY,
87 		.instance = 3,
88 		.irq_offset = ilog2(INTR_BCS(3)),
89 		.domain = XE_FW_RENDER,
90 		.mmio_base = XEHPC_BCS3_RING_BASE,
91 	},
92 	[XE_HW_ENGINE_BCS4] = {
93 		.name = "bcs4",
94 		.class = XE_ENGINE_CLASS_COPY,
95 		.instance = 4,
96 		.irq_offset = ilog2(INTR_BCS(4)),
97 		.domain = XE_FW_RENDER,
98 		.mmio_base = XEHPC_BCS4_RING_BASE,
99 	},
100 	[XE_HW_ENGINE_BCS5] = {
101 		.name = "bcs5",
102 		.class = XE_ENGINE_CLASS_COPY,
103 		.instance = 5,
104 		.irq_offset = ilog2(INTR_BCS(5)),
105 		.domain = XE_FW_RENDER,
106 		.mmio_base = XEHPC_BCS5_RING_BASE,
107 	},
108 	[XE_HW_ENGINE_BCS6] = {
109 		.name = "bcs6",
110 		.class = XE_ENGINE_CLASS_COPY,
111 		.instance = 6,
112 		.irq_offset = ilog2(INTR_BCS(6)),
113 		.domain = XE_FW_RENDER,
114 		.mmio_base = XEHPC_BCS6_RING_BASE,
115 	},
116 	[XE_HW_ENGINE_BCS7] = {
117 		.name = "bcs7",
118 		.class = XE_ENGINE_CLASS_COPY,
119 		.irq_offset = ilog2(INTR_BCS(7)),
120 		.instance = 7,
121 		.domain = XE_FW_RENDER,
122 		.mmio_base = XEHPC_BCS7_RING_BASE,
123 	},
124 	[XE_HW_ENGINE_BCS8] = {
125 		.name = "bcs8",
126 		.class = XE_ENGINE_CLASS_COPY,
127 		.instance = 8,
128 		.irq_offset = ilog2(INTR_BCS8),
129 		.domain = XE_FW_RENDER,
130 		.mmio_base = XEHPC_BCS8_RING_BASE,
131 	},
132 
133 	[XE_HW_ENGINE_VCS0] = {
134 		.name = "vcs0",
135 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
136 		.instance = 0,
137 		.irq_offset = 32 + ilog2(INTR_VCS(0)),
138 		.domain = XE_FW_MEDIA_VDBOX0,
139 		.mmio_base = BSD_RING_BASE,
140 	},
141 	[XE_HW_ENGINE_VCS1] = {
142 		.name = "vcs1",
143 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
144 		.instance = 1,
145 		.irq_offset = 32 + ilog2(INTR_VCS(1)),
146 		.domain = XE_FW_MEDIA_VDBOX1,
147 		.mmio_base = BSD2_RING_BASE,
148 	},
149 	[XE_HW_ENGINE_VCS2] = {
150 		.name = "vcs2",
151 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
152 		.instance = 2,
153 		.irq_offset = 32 + ilog2(INTR_VCS(2)),
154 		.domain = XE_FW_MEDIA_VDBOX2,
155 		.mmio_base = BSD3_RING_BASE,
156 	},
157 	[XE_HW_ENGINE_VCS3] = {
158 		.name = "vcs3",
159 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
160 		.instance = 3,
161 		.irq_offset = 32 + ilog2(INTR_VCS(3)),
162 		.domain = XE_FW_MEDIA_VDBOX3,
163 		.mmio_base = BSD4_RING_BASE,
164 	},
165 	[XE_HW_ENGINE_VCS4] = {
166 		.name = "vcs4",
167 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
168 		.instance = 4,
169 		.irq_offset = 32 + ilog2(INTR_VCS(4)),
170 		.domain = XE_FW_MEDIA_VDBOX4,
171 		.mmio_base = XEHP_BSD5_RING_BASE,
172 	},
173 	[XE_HW_ENGINE_VCS5] = {
174 		.name = "vcs5",
175 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
176 		.instance = 5,
177 		.irq_offset = 32 + ilog2(INTR_VCS(5)),
178 		.domain = XE_FW_MEDIA_VDBOX5,
179 		.mmio_base = XEHP_BSD6_RING_BASE,
180 	},
181 	[XE_HW_ENGINE_VCS6] = {
182 		.name = "vcs6",
183 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
184 		.instance = 6,
185 		.irq_offset = 32 + ilog2(INTR_VCS(6)),
186 		.domain = XE_FW_MEDIA_VDBOX6,
187 		.mmio_base = XEHP_BSD7_RING_BASE,
188 	},
189 	[XE_HW_ENGINE_VCS7] = {
190 		.name = "vcs7",
191 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
192 		.instance = 7,
193 		.irq_offset = 32 + ilog2(INTR_VCS(7)),
194 		.domain = XE_FW_MEDIA_VDBOX7,
195 		.mmio_base = XEHP_BSD8_RING_BASE,
196 	},
197 	[XE_HW_ENGINE_VECS0] = {
198 		.name = "vecs0",
199 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
200 		.instance = 0,
201 		.irq_offset = 32 + ilog2(INTR_VECS(0)),
202 		.domain = XE_FW_MEDIA_VEBOX0,
203 		.mmio_base = VEBOX_RING_BASE,
204 	},
205 	[XE_HW_ENGINE_VECS1] = {
206 		.name = "vecs1",
207 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
208 		.instance = 1,
209 		.irq_offset = 32 + ilog2(INTR_VECS(1)),
210 		.domain = XE_FW_MEDIA_VEBOX1,
211 		.mmio_base = VEBOX2_RING_BASE,
212 	},
213 	[XE_HW_ENGINE_VECS2] = {
214 		.name = "vecs2",
215 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
216 		.instance = 2,
217 		.irq_offset = 32 + ilog2(INTR_VECS(2)),
218 		.domain = XE_FW_MEDIA_VEBOX2,
219 		.mmio_base = XEHP_VEBOX3_RING_BASE,
220 	},
221 	[XE_HW_ENGINE_VECS3] = {
222 		.name = "vecs3",
223 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
224 		.instance = 3,
225 		.irq_offset = 32 + ilog2(INTR_VECS(3)),
226 		.domain = XE_FW_MEDIA_VEBOX3,
227 		.mmio_base = XEHP_VEBOX4_RING_BASE,
228 	},
229 	[XE_HW_ENGINE_CCS0] = {
230 		.name = "ccs0",
231 		.class = XE_ENGINE_CLASS_COMPUTE,
232 		.instance = 0,
233 		.irq_offset = ilog2(INTR_CCS(0)),
234 		.domain = XE_FW_RENDER,
235 		.mmio_base = COMPUTE0_RING_BASE,
236 	},
237 	[XE_HW_ENGINE_CCS1] = {
238 		.name = "ccs1",
239 		.class = XE_ENGINE_CLASS_COMPUTE,
240 		.instance = 1,
241 		.irq_offset = ilog2(INTR_CCS(1)),
242 		.domain = XE_FW_RENDER,
243 		.mmio_base = COMPUTE1_RING_BASE,
244 	},
245 	[XE_HW_ENGINE_CCS2] = {
246 		.name = "ccs2",
247 		.class = XE_ENGINE_CLASS_COMPUTE,
248 		.instance = 2,
249 		.irq_offset = ilog2(INTR_CCS(2)),
250 		.domain = XE_FW_RENDER,
251 		.mmio_base = COMPUTE2_RING_BASE,
252 	},
253 	[XE_HW_ENGINE_CCS3] = {
254 		.name = "ccs3",
255 		.class = XE_ENGINE_CLASS_COMPUTE,
256 		.instance = 3,
257 		.irq_offset = ilog2(INTR_CCS(3)),
258 		.domain = XE_FW_RENDER,
259 		.mmio_base = COMPUTE3_RING_BASE,
260 	},
261 	[XE_HW_ENGINE_GSCCS0] = {
262 		.name = "gsccs0",
263 		.class = XE_ENGINE_CLASS_OTHER,
264 		.instance = OTHER_GSC_INSTANCE,
265 		.domain = XE_FW_GSC,
266 		.mmio_base = GSCCS_RING_BASE,
267 	},
268 };
269 
hw_engine_fini(void * arg)270 static void hw_engine_fini(void *arg)
271 {
272 	struct xe_hw_engine *hwe = arg;
273 
274 	if (hwe->exl_port)
275 		xe_execlist_port_destroy(hwe->exl_port);
276 
277 	hwe->gt = NULL;
278 }
279 
280 /**
281  * xe_hw_engine_mmio_write32() - Write engine register
282  * @hwe: engine
283  * @reg: register to write into
284  * @val: desired 32-bit value to write
285  *
286  * This function will write val into an engine specific register.
287  * Forcewake must be held by the caller.
288  *
289  */
xe_hw_engine_mmio_write32(struct xe_hw_engine * hwe,struct xe_reg reg,u32 val)290 void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe,
291 			       struct xe_reg reg, u32 val)
292 {
293 	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
294 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
295 
296 	reg.addr += hwe->mmio_base;
297 
298 	xe_mmio_write32(hwe->gt, reg, val);
299 }
300 
301 /**
302  * xe_hw_engine_mmio_read32() - Read engine register
303  * @hwe: engine
304  * @reg: register to read from
305  *
306  * This function will read from an engine specific register.
307  * Forcewake must be held by the caller.
308  *
309  * Return: value of the 32-bit register.
310  */
xe_hw_engine_mmio_read32(struct xe_hw_engine * hwe,struct xe_reg reg)311 u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
312 {
313 	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
314 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
315 
316 	reg.addr += hwe->mmio_base;
317 
318 	return xe_mmio_read32(hwe->gt, reg);
319 }
320 
xe_hw_engine_enable_ring(struct xe_hw_engine * hwe)321 void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
322 {
323 	u32 ccs_mask =
324 		xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
325 
326 	if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
327 		xe_mmio_write32(hwe->gt, RCU_MODE,
328 				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
329 
330 	xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
331 	xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
332 				  xe_bo_ggtt_addr(hwe->hwsp));
333 	xe_hw_engine_mmio_write32(hwe, RING_MODE(0),
334 				  _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
335 	xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
336 				  _MASKED_BIT_DISABLE(STOP_RING));
337 	xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
338 }
339 
xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt * gt,const struct xe_hw_engine * hwe)340 static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
341 						 const struct xe_hw_engine *hwe)
342 {
343 	return xe_gt_ccs_mode_enabled(gt) &&
344 	       xe_rtp_match_first_render_or_compute(gt, hwe);
345 }
346 
xe_rtp_cfeg_wmtp_disabled(const struct xe_gt * gt,const struct xe_hw_engine * hwe)347 static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt,
348 				      const struct xe_hw_engine *hwe)
349 {
350 	if (GRAPHICS_VER(gt_to_xe(gt)) < 20)
351 		return false;
352 
353 	if (hwe->class != XE_ENGINE_CLASS_COMPUTE &&
354 	    hwe->class != XE_ENGINE_CLASS_RENDER)
355 		return false;
356 
357 	return xe_mmio_read32(hwe->gt, XEHP_FUSE4) & CFEG_WMTP_DISABLE;
358 }
359 
360 void
xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine * hwe)361 xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
362 {
363 	struct xe_gt *gt = hwe->gt;
364 	const u8 mocs_write_idx = gt->mocs.uc_index;
365 	const u8 mocs_read_idx = gt->mocs.uc_index;
366 	u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
367 			    REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
368 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
369 	const struct xe_rtp_entry_sr lrc_setup[] = {
370 		/*
371 		 * Some blitter commands do not have a field for MOCS, those
372 		 * commands will use MOCS index pointed by BLIT_CCTL.
373 		 * BLIT_CCTL registers are needed to be programmed to un-cached.
374 		 */
375 		{ XE_RTP_NAME("BLIT_CCTL_default_MOCS"),
376 		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED),
377 			       ENGINE_CLASS(COPY)),
378 		  XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0),
379 				 BLIT_CCTL_DST_MOCS_MASK |
380 				 BLIT_CCTL_SRC_MOCS_MASK,
381 				 blit_cctl_val,
382 				 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
383 		},
384 		/* Use Fixed slice CCS mode */
385 		{ XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
386 		  XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
387 		  XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
388 					   RCU_MODE_FIXED_SLICE_CCS_MODE))
389 		},
390 		/* Disable WMTP if HW doesn't support it */
391 		{ XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"),
392 		  XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)),
393 		  XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0),
394 					   PREEMPT_GPGPU_LEVEL_MASK,
395 					   PREEMPT_GPGPU_THREAD_GROUP_LEVEL)),
396 		  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE)
397 		},
398 		{}
399 	};
400 
401 	xe_rtp_process_to_sr(&ctx, lrc_setup, &hwe->reg_lrc);
402 }
403 
404 static void
hw_engine_setup_default_state(struct xe_hw_engine * hwe)405 hw_engine_setup_default_state(struct xe_hw_engine *hwe)
406 {
407 	struct xe_gt *gt = hwe->gt;
408 	struct xe_device *xe = gt_to_xe(gt);
409 	/*
410 	 * RING_CMD_CCTL specifies the default MOCS entry that will be
411 	 * used by the command streamer when executing commands that
412 	 * don't have a way to explicitly specify a MOCS setting.
413 	 * The default should usually reference whichever MOCS entry
414 	 * corresponds to uncached behavior, although use of a WB cached
415 	 * entry is recommended by the spec in certain circumstances on
416 	 * specific platforms.
417 	 * Bspec: 72161
418 	 */
419 	const u8 mocs_write_idx = gt->mocs.uc_index;
420 	const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE &&
421 				 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ?
422 				 gt->mocs.wb_index : gt->mocs.uc_index;
423 	u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
424 				REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
425 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
426 	const struct xe_rtp_entry_sr engine_entries[] = {
427 		{ XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"),
428 		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)),
429 		  XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0),
430 					   CMD_CCTL_WRITE_OVERRIDE_MASK |
431 					   CMD_CCTL_READ_OVERRIDE_MASK,
432 					   ring_cmd_cctl_val,
433 					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
434 		},
435 		/*
436 		 * To allow the GSC engine to go idle on MTL we need to enable
437 		 * idle messaging and set the hysteresis value (we use 0xA=5us
438 		 * as recommended in spec). On platforms after MTL this is
439 		 * enabled by default.
440 		 */
441 		{ XE_RTP_NAME("MTL GSCCS IDLE MSG enable"),
442 		  XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)),
443 		  XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0),
444 				     IDLE_MSG_DISABLE,
445 				     XE_RTP_ACTION_FLAG(ENGINE_BASE)),
446 				 FIELD_SET(RING_PWRCTX_MAXCNT(0),
447 					   IDLE_WAIT_TIME,
448 					   0xA,
449 					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
450 		},
451 		/* Enable Priority Mem Read */
452 		{ XE_RTP_NAME("Priority_Mem_Read"),
453 		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
454 		  XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ,
455 				     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
456 		},
457 		{}
458 	};
459 
460 	xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
461 }
462 
hw_engine_init_early(struct xe_gt * gt,struct xe_hw_engine * hwe,enum xe_hw_engine_id id)463 static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
464 				 enum xe_hw_engine_id id)
465 {
466 	const struct engine_info *info;
467 
468 	if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name))
469 		return;
470 
471 	if (!(gt->info.engine_mask & BIT(id)))
472 		return;
473 
474 	info = &engine_infos[id];
475 
476 	xe_gt_assert(gt, !hwe->gt);
477 
478 	hwe->gt = gt;
479 	hwe->class = info->class;
480 	hwe->instance = info->instance;
481 	hwe->mmio_base = info->mmio_base;
482 	hwe->irq_offset = info->irq_offset;
483 	hwe->domain = info->domain;
484 	hwe->name = info->name;
485 	hwe->fence_irq = &gt->fence_irq[info->class];
486 	hwe->engine_id = id;
487 
488 	hwe->eclass = &gt->eclass[hwe->class];
489 	if (!hwe->eclass->sched_props.job_timeout_ms) {
490 		hwe->eclass->sched_props.job_timeout_ms = 5 * 1000;
491 		hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
492 		hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
493 		hwe->eclass->sched_props.timeslice_us = 1 * 1000;
494 		hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN;
495 		hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX;
496 		hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT;
497 		hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
498 		hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
499 
500 		/*
501 		 * The GSC engine can accept submissions while the GSC shim is
502 		 * being reset, during which time the submission is stalled. In
503 		 * the worst case, the shim reset can take up to the maximum GSC
504 		 * command execution time (250ms), so the request start can be
505 		 * delayed by that much; the request itself can take that long
506 		 * without being preemptible, which means worst case it can
507 		 * theoretically take up to 500ms for a preemption to go through
508 		 * on the GSC engine. Adding to that an extra 100ms as a safety
509 		 * margin, we get a minimum recommended timeout of 600ms.
510 		 * The preempt_timeout value can't be tuned for OTHER_CLASS
511 		 * because the class is reserved for kernel usage, so we just
512 		 * need to make sure that the starting value is above that
513 		 * threshold; since our default value (640ms) is greater than
514 		 * 600ms, the only way we can go below is via a kconfig setting.
515 		 * If that happens, log it in dmesg and update the value.
516 		 */
517 		if (hwe->class == XE_ENGINE_CLASS_OTHER) {
518 			const u32 min_preempt_timeout = 600 * 1000;
519 			if (hwe->eclass->sched_props.preempt_timeout_us < min_preempt_timeout) {
520 				hwe->eclass->sched_props.preempt_timeout_us = min_preempt_timeout;
521 				xe_gt_notice(gt, "Increasing preempt_timeout for GSC to 600ms\n");
522 			}
523 		}
524 
525 		/* Record default props */
526 		hwe->eclass->defaults = hwe->eclass->sched_props;
527 	}
528 
529 	xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
530 	xe_tuning_process_engine(hwe);
531 	xe_wa_process_engine(hwe);
532 	hw_engine_setup_default_state(hwe);
533 
534 	xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt));
535 	xe_reg_whitelist_process_engine(hwe);
536 }
537 
hw_engine_init(struct xe_gt * gt,struct xe_hw_engine * hwe,enum xe_hw_engine_id id)538 static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
539 			  enum xe_hw_engine_id id)
540 {
541 	struct xe_device *xe = gt_to_xe(gt);
542 	struct xe_tile *tile = gt_to_tile(gt);
543 	int err;
544 
545 	xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name);
546 	xe_gt_assert(gt, gt->info.engine_mask & BIT(id));
547 
548 	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
549 	xe_reg_sr_apply_whitelist(hwe);
550 
551 	hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
552 						 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
553 						 XE_BO_FLAG_GGTT |
554 						 XE_BO_FLAG_GGTT_INVALIDATE);
555 	if (IS_ERR(hwe->hwsp)) {
556 		err = PTR_ERR(hwe->hwsp);
557 		goto err_name;
558 	}
559 
560 	if (!xe_device_uc_enabled(xe)) {
561 		hwe->exl_port = xe_execlist_port_create(xe, hwe);
562 		if (IS_ERR(hwe->exl_port)) {
563 			err = PTR_ERR(hwe->exl_port);
564 			goto err_hwsp;
565 		}
566 	} else {
567 		/* GSCCS has a special interrupt for reset */
568 		if (hwe->class == XE_ENGINE_CLASS_OTHER)
569 			hwe->irq_handler = xe_gsc_hwe_irq_handler;
570 
571 		if (!IS_SRIOV_VF(xe))
572 			xe_hw_engine_enable_ring(hwe);
573 	}
574 
575 	/* We reserve the highest BCS instance for USM */
576 	if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
577 		gt->usm.reserved_bcs_instance = hwe->instance;
578 
579 	return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe);
580 
581 err_hwsp:
582 	xe_bo_unpin_map_no_vm(hwe->hwsp);
583 err_name:
584 	hwe->name = NULL;
585 
586 	return err;
587 }
588 
hw_engine_setup_logical_mapping(struct xe_gt * gt)589 static void hw_engine_setup_logical_mapping(struct xe_gt *gt)
590 {
591 	int class;
592 
593 	/* FIXME: Doing a simple logical mapping that works for most hardware */
594 	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
595 		struct xe_hw_engine *hwe;
596 		enum xe_hw_engine_id id;
597 		int logical_instance = 0;
598 
599 		for_each_hw_engine(hwe, gt, id)
600 			if (hwe->class == class)
601 				hwe->logical_instance = logical_instance++;
602 	}
603 }
604 
read_media_fuses(struct xe_gt * gt)605 static void read_media_fuses(struct xe_gt *gt)
606 {
607 	struct xe_device *xe = gt_to_xe(gt);
608 	u32 media_fuse;
609 	u16 vdbox_mask;
610 	u16 vebox_mask;
611 	int i, j;
612 
613 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
614 
615 	media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE);
616 
617 	/*
618 	 * Pre-Xe_HP platforms had register bits representing absent engines,
619 	 * whereas Xe_HP and beyond have bits representing present engines.
620 	 * Invert the polarity on old platforms so that we can use common
621 	 * handling below.
622 	 */
623 	if (GRAPHICS_VERx100(xe) < 1250)
624 		media_fuse = ~media_fuse;
625 
626 	vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse);
627 	vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse);
628 
629 	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
630 		if (!(gt->info.engine_mask & BIT(i)))
631 			continue;
632 
633 		if (!(BIT(j) & vdbox_mask)) {
634 			gt->info.engine_mask &= ~BIT(i);
635 			drm_info(&xe->drm, "vcs%u fused off\n", j);
636 		}
637 	}
638 
639 	for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
640 		if (!(gt->info.engine_mask & BIT(i)))
641 			continue;
642 
643 		if (!(BIT(j) & vebox_mask)) {
644 			gt->info.engine_mask &= ~BIT(i);
645 			drm_info(&xe->drm, "vecs%u fused off\n", j);
646 		}
647 	}
648 }
649 
read_copy_fuses(struct xe_gt * gt)650 static void read_copy_fuses(struct xe_gt *gt)
651 {
652 	struct xe_device *xe = gt_to_xe(gt);
653 	u32 bcs_mask;
654 
655 	if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270)
656 		return;
657 
658 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
659 
660 	bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3);
661 	bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
662 
663 	/* BCS0 is always present; only BCS1-BCS8 may be fused off */
664 	for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
665 		if (!(gt->info.engine_mask & BIT(i)))
666 			continue;
667 
668 		if (!(BIT(j / 2) & bcs_mask)) {
669 			gt->info.engine_mask &= ~BIT(i);
670 			drm_info(&xe->drm, "bcs%u fused off\n", j);
671 		}
672 	}
673 }
674 
read_compute_fuses_from_dss(struct xe_gt * gt)675 static void read_compute_fuses_from_dss(struct xe_gt *gt)
676 {
677 	struct xe_device *xe = gt_to_xe(gt);
678 
679 	/*
680 	 * CCS fusing based on DSS masks only applies to platforms that can
681 	 * have more than one CCS.
682 	 */
683 	if (hweight64(gt->info.engine_mask &
684 		      GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1)
685 		return;
686 
687 	/*
688 	 * CCS availability on Xe_HP is inferred from the presence of DSS in
689 	 * each quadrant.
690 	 */
691 	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
692 		if (!(gt->info.engine_mask & BIT(i)))
693 			continue;
694 
695 		if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) {
696 			gt->info.engine_mask &= ~BIT(i);
697 			drm_info(&xe->drm, "ccs%u fused off\n", j);
698 		}
699 	}
700 }
701 
read_compute_fuses_from_reg(struct xe_gt * gt)702 static void read_compute_fuses_from_reg(struct xe_gt *gt)
703 {
704 	struct xe_device *xe = gt_to_xe(gt);
705 	u32 ccs_mask;
706 
707 	ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4);
708 	ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask);
709 
710 	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
711 		if (!(gt->info.engine_mask & BIT(i)))
712 			continue;
713 
714 		if ((ccs_mask & BIT(j)) == 0) {
715 			gt->info.engine_mask &= ~BIT(i);
716 			drm_info(&xe->drm, "ccs%u fused off\n", j);
717 		}
718 	}
719 }
720 
read_compute_fuses(struct xe_gt * gt)721 static void read_compute_fuses(struct xe_gt *gt)
722 {
723 	if (GRAPHICS_VER(gt_to_xe(gt)) >= 20)
724 		read_compute_fuses_from_reg(gt);
725 	else
726 		read_compute_fuses_from_dss(gt);
727 }
728 
check_gsc_availability(struct xe_gt * gt)729 static void check_gsc_availability(struct xe_gt *gt)
730 {
731 	struct xe_device *xe = gt_to_xe(gt);
732 
733 	if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)))
734 		return;
735 
736 	/*
737 	 * The GSCCS is only used to communicate with the GSC FW, so if we don't
738 	 * have the FW there is nothing we need the engine for and can therefore
739 	 * skip its initialization.
740 	 */
741 	if (!xe_uc_fw_is_available(&gt->uc.gsc.fw)) {
742 		gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0);
743 
744 		/* interrupts where previously enabled, so turn them off */
745 		xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, 0);
746 		xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~0);
747 
748 		drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
749 	}
750 }
751 
xe_hw_engines_init_early(struct xe_gt * gt)752 int xe_hw_engines_init_early(struct xe_gt *gt)
753 {
754 	int i;
755 
756 	read_media_fuses(gt);
757 	read_copy_fuses(gt);
758 	read_compute_fuses(gt);
759 	check_gsc_availability(gt);
760 
761 	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN);
762 	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);
763 
764 	for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
765 		hw_engine_init_early(gt, &gt->hw_engines[i], i);
766 
767 	return 0;
768 }
769 
xe_hw_engines_init(struct xe_gt * gt)770 int xe_hw_engines_init(struct xe_gt *gt)
771 {
772 	int err;
773 	struct xe_hw_engine *hwe;
774 	enum xe_hw_engine_id id;
775 
776 	for_each_hw_engine(hwe, gt, id) {
777 		err = hw_engine_init(gt, hwe, id);
778 		if (err)
779 			return err;
780 	}
781 
782 	hw_engine_setup_logical_mapping(gt);
783 	err = xe_hw_engine_setup_groups(gt);
784 	if (err)
785 		return err;
786 
787 	return 0;
788 }
789 
xe_hw_engine_handle_irq(struct xe_hw_engine * hwe,u16 intr_vec)790 void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
791 {
792 	wake_up_all(&gt_to_xe(hwe->gt)->ufence_wq);
793 
794 	if (hwe->irq_handler)
795 		hwe->irq_handler(hwe, intr_vec);
796 
797 	if (intr_vec & GT_RENDER_USER_INTERRUPT)
798 		xe_hw_fence_irq_run(hwe->fence_irq);
799 }
800 
801 static bool
is_slice_common_per_gslice(struct xe_device * xe)802 is_slice_common_per_gslice(struct xe_device *xe)
803 {
804 	return GRAPHICS_VERx100(xe) >= 1255;
805 }
806 
807 static void
xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine * hwe,struct xe_hw_engine_snapshot * snapshot)808 xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe,
809 				       struct xe_hw_engine_snapshot *snapshot)
810 {
811 	struct xe_gt *gt = hwe->gt;
812 	struct xe_device *xe = gt_to_xe(gt);
813 	unsigned int dss;
814 	u16 group, instance;
815 
816 	snapshot->reg.instdone.ring = xe_hw_engine_mmio_read32(hwe, RING_INSTDONE(0));
817 
818 	if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
819 		return;
820 
821 	if (is_slice_common_per_gslice(xe) == false) {
822 		snapshot->reg.instdone.slice_common[0] =
823 			xe_mmio_read32(gt, SC_INSTDONE);
824 		snapshot->reg.instdone.slice_common_extra[0] =
825 			xe_mmio_read32(gt, SC_INSTDONE_EXTRA);
826 		snapshot->reg.instdone.slice_common_extra2[0] =
827 			xe_mmio_read32(gt, SC_INSTDONE_EXTRA2);
828 	} else {
829 		for_each_geometry_dss(dss, gt, group, instance) {
830 			snapshot->reg.instdone.slice_common[dss] =
831 				xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE, group, instance);
832 			snapshot->reg.instdone.slice_common_extra[dss] =
833 				xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA, group, instance);
834 			snapshot->reg.instdone.slice_common_extra2[dss] =
835 				xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA2, group, instance);
836 		}
837 	}
838 
839 	for_each_geometry_dss(dss, gt, group, instance) {
840 		snapshot->reg.instdone.sampler[dss] =
841 			xe_gt_mcr_unicast_read(gt, SAMPLER_INSTDONE, group, instance);
842 		snapshot->reg.instdone.row[dss] =
843 			xe_gt_mcr_unicast_read(gt, ROW_INSTDONE, group, instance);
844 
845 		if (GRAPHICS_VERx100(xe) >= 1255)
846 			snapshot->reg.instdone.geom_svg[dss] =
847 				xe_gt_mcr_unicast_read(gt, XEHPG_INSTDONE_GEOM_SVGUNIT,
848 						       group, instance);
849 	}
850 }
851 
852 /**
853  * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
854  * @hwe: Xe HW Engine.
855  *
856  * This can be printed out in a later stage like during dev_coredump
857  * analysis.
858  *
859  * Returns: a Xe HW Engine snapshot object that must be freed by the
860  * caller, using `xe_hw_engine_snapshot_free`.
861  */
862 struct xe_hw_engine_snapshot *
xe_hw_engine_snapshot_capture(struct xe_hw_engine * hwe)863 xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
864 {
865 	struct xe_hw_engine_snapshot *snapshot;
866 	size_t len;
867 	u64 val;
868 
869 	if (!xe_hw_engine_is_valid(hwe))
870 		return NULL;
871 
872 	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
873 
874 	if (!snapshot)
875 		return NULL;
876 
877 	/* Because XE_MAX_DSS_FUSE_BITS is defined in xe_gt_types.h and it
878 	 * includes xe_hw_engine_types.h the length of this 3 registers can't be
879 	 * set in struct xe_hw_engine_snapshot, so here doing additional
880 	 * allocations.
881 	 */
882 	len = (XE_MAX_DSS_FUSE_BITS * sizeof(u32));
883 	snapshot->reg.instdone.slice_common = kzalloc(len, GFP_ATOMIC);
884 	snapshot->reg.instdone.slice_common_extra = kzalloc(len, GFP_ATOMIC);
885 	snapshot->reg.instdone.slice_common_extra2 = kzalloc(len, GFP_ATOMIC);
886 	snapshot->reg.instdone.sampler = kzalloc(len, GFP_ATOMIC);
887 	snapshot->reg.instdone.row = kzalloc(len, GFP_ATOMIC);
888 	snapshot->reg.instdone.geom_svg = kzalloc(len, GFP_ATOMIC);
889 	if (!snapshot->reg.instdone.slice_common ||
890 	    !snapshot->reg.instdone.slice_common_extra ||
891 	    !snapshot->reg.instdone.slice_common_extra2 ||
892 	    !snapshot->reg.instdone.sampler ||
893 	    !snapshot->reg.instdone.row ||
894 	    !snapshot->reg.instdone.geom_svg) {
895 		xe_hw_engine_snapshot_free(snapshot);
896 		return NULL;
897 	}
898 
899 	snapshot->name = kstrdup(hwe->name, GFP_ATOMIC);
900 	snapshot->hwe = hwe;
901 	snapshot->logical_instance = hwe->logical_instance;
902 	snapshot->forcewake.domain = hwe->domain;
903 	snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt),
904 						    hwe->domain);
905 	snapshot->mmio_base = hwe->mmio_base;
906 
907 	/* no more VF accessible data below this point */
908 	if (IS_SRIOV_VF(gt_to_xe(hwe->gt)))
909 		return snapshot;
910 
911 	snapshot->reg.ring_execlist_status =
912 		xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0));
913 	val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
914 	snapshot->reg.ring_execlist_status |= val << 32;
915 
916 	snapshot->reg.ring_execlist_sq_contents =
917 		xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0));
918 	val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0));
919 	snapshot->reg.ring_execlist_sq_contents |= val << 32;
920 
921 	snapshot->reg.ring_acthd = xe_hw_engine_mmio_read32(hwe, RING_ACTHD(0));
922 	val = xe_hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
923 	snapshot->reg.ring_acthd |= val << 32;
924 
925 	snapshot->reg.ring_bbaddr = xe_hw_engine_mmio_read32(hwe, RING_BBADDR(0));
926 	val = xe_hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
927 	snapshot->reg.ring_bbaddr |= val << 32;
928 
929 	snapshot->reg.ring_dma_fadd =
930 		xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
931 	val = xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
932 	snapshot->reg.ring_dma_fadd |= val << 32;
933 
934 	snapshot->reg.ring_hwstam = xe_hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
935 	snapshot->reg.ring_hws_pga = xe_hw_engine_mmio_read32(hwe, RING_HWS_PGA(0));
936 	snapshot->reg.ring_start = xe_hw_engine_mmio_read32(hwe, RING_START(0));
937 	if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) {
938 		val = xe_hw_engine_mmio_read32(hwe, RING_START_UDW(0));
939 		snapshot->reg.ring_start |= val << 32;
940 	}
941 	if (xe_gt_has_indirect_ring_state(hwe->gt)) {
942 		snapshot->reg.indirect_ring_state =
943 			xe_hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0));
944 	}
945 
946 	snapshot->reg.ring_head =
947 		xe_hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
948 	snapshot->reg.ring_tail =
949 		xe_hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR;
950 	snapshot->reg.ring_ctl = xe_hw_engine_mmio_read32(hwe, RING_CTL(0));
951 	snapshot->reg.ring_mi_mode =
952 		xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
953 	snapshot->reg.ring_mode = xe_hw_engine_mmio_read32(hwe, RING_MODE(0));
954 	snapshot->reg.ring_imr = xe_hw_engine_mmio_read32(hwe, RING_IMR(0));
955 	snapshot->reg.ring_esr = xe_hw_engine_mmio_read32(hwe, RING_ESR(0));
956 	snapshot->reg.ring_emr = xe_hw_engine_mmio_read32(hwe, RING_EMR(0));
957 	snapshot->reg.ring_eir = xe_hw_engine_mmio_read32(hwe, RING_EIR(0));
958 	snapshot->reg.ipehr = xe_hw_engine_mmio_read32(hwe, RING_IPEHR(0));
959 	xe_hw_engine_snapshot_instdone_capture(hwe, snapshot);
960 
961 	if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
962 		snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
963 
964 	return snapshot;
965 }
966 
967 static void
xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot * snapshot,struct drm_printer * p)968 xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p)
969 {
970 	struct xe_gt *gt = snapshot->hwe->gt;
971 	struct xe_device *xe = gt_to_xe(gt);
972 	u16 group, instance;
973 	unsigned int dss;
974 
975 	drm_printf(p, "\tRING_INSTDONE: 0x%08x\n", snapshot->reg.instdone.ring);
976 
977 	if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
978 		return;
979 
980 	if (is_slice_common_per_gslice(xe) == false) {
981 		drm_printf(p, "\tSC_INSTDONE[0]: 0x%08x\n",
982 			   snapshot->reg.instdone.slice_common[0]);
983 		drm_printf(p, "\tSC_INSTDONE_EXTRA[0]: 0x%08x\n",
984 			   snapshot->reg.instdone.slice_common_extra[0]);
985 		drm_printf(p, "\tSC_INSTDONE_EXTRA2[0]: 0x%08x\n",
986 			   snapshot->reg.instdone.slice_common_extra2[0]);
987 	} else {
988 		for_each_geometry_dss(dss, gt, group, instance) {
989 			drm_printf(p, "\tSC_INSTDONE[%u]: 0x%08x\n", dss,
990 				   snapshot->reg.instdone.slice_common[dss]);
991 			drm_printf(p, "\tSC_INSTDONE_EXTRA[%u]: 0x%08x\n", dss,
992 				   snapshot->reg.instdone.slice_common_extra[dss]);
993 			drm_printf(p, "\tSC_INSTDONE_EXTRA2[%u]: 0x%08x\n", dss,
994 				   snapshot->reg.instdone.slice_common_extra2[dss]);
995 		}
996 	}
997 
998 	for_each_geometry_dss(dss, gt, group, instance) {
999 		drm_printf(p, "\tSAMPLER_INSTDONE[%u]: 0x%08x\n", dss,
1000 			   snapshot->reg.instdone.sampler[dss]);
1001 		drm_printf(p, "\tROW_INSTDONE[%u]: 0x%08x\n", dss,
1002 			   snapshot->reg.instdone.row[dss]);
1003 
1004 		if (GRAPHICS_VERx100(xe) >= 1255)
1005 			drm_printf(p, "\tINSTDONE_GEOM_SVGUNIT[%u]: 0x%08x\n",
1006 				   dss, snapshot->reg.instdone.geom_svg[dss]);
1007 	}
1008 }
1009 
1010 /**
1011  * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
1012  * @snapshot: Xe HW Engine snapshot object.
1013  * @p: drm_printer where it will be printed out.
1014  *
1015  * This function prints out a given Xe HW Engine snapshot object.
1016  */
xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot * snapshot,struct drm_printer * p)1017 void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
1018 				 struct drm_printer *p)
1019 {
1020 	if (!snapshot)
1021 		return;
1022 
1023 	drm_printf(p, "%s (physical), logical instance=%d\n",
1024 		   snapshot->name ? snapshot->name : "",
1025 		   snapshot->logical_instance);
1026 	drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
1027 		   snapshot->forcewake.domain, snapshot->forcewake.ref);
1028 	drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam);
1029 	drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga);
1030 	drm_printf(p, "\tRING_EXECLIST_STATUS: 0x%016llx\n",
1031 		   snapshot->reg.ring_execlist_status);
1032 	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n",
1033 		   snapshot->reg.ring_execlist_sq_contents);
1034 	drm_printf(p, "\tRING_START: 0x%016llx\n", snapshot->reg.ring_start);
1035 	drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head);
1036 	drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail);
1037 	drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
1038 	drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode);
1039 	drm_printf(p, "\tRING_MODE: 0x%08x\n",
1040 		   snapshot->reg.ring_mode);
1041 	drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr);
1042 	drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr);
1043 	drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr);
1044 	drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir);
1045 	drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd);
1046 	drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr);
1047 	drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd);
1048 	drm_printf(p, "\tINDIRECT_RING_STATE: 0x%08x\n",
1049 		   snapshot->reg.indirect_ring_state);
1050 	drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr);
1051 	xe_hw_engine_snapshot_instdone_print(snapshot, p);
1052 
1053 	if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
1054 		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
1055 			   snapshot->reg.rcu_mode);
1056 	drm_puts(p, "\n");
1057 }
1058 
1059 /**
1060  * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot.
1061  * @snapshot: Xe HW Engine snapshot object.
1062  *
1063  * This function free all the memory that needed to be allocated at capture
1064  * time.
1065  */
xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot * snapshot)1066 void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
1067 {
1068 	if (!snapshot)
1069 		return;
1070 
1071 	kfree(snapshot->reg.instdone.slice_common);
1072 	kfree(snapshot->reg.instdone.slice_common_extra);
1073 	kfree(snapshot->reg.instdone.slice_common_extra2);
1074 	kfree(snapshot->reg.instdone.sampler);
1075 	kfree(snapshot->reg.instdone.row);
1076 	kfree(snapshot->reg.instdone.geom_svg);
1077 	kfree(snapshot->name);
1078 	kfree(snapshot);
1079 }
1080 
1081 /**
1082  * xe_hw_engine_print - Xe HW Engine Print.
1083  * @hwe: Hardware Engine.
1084  * @p: drm_printer.
1085  *
1086  * This function quickly capture a snapshot and immediately print it out.
1087  */
xe_hw_engine_print(struct xe_hw_engine * hwe,struct drm_printer * p)1088 void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p)
1089 {
1090 	struct xe_hw_engine_snapshot *snapshot;
1091 
1092 	snapshot = xe_hw_engine_snapshot_capture(hwe);
1093 	xe_hw_engine_snapshot_print(snapshot, p);
1094 	xe_hw_engine_snapshot_free(snapshot);
1095 }
1096 
xe_hw_engine_mask_per_class(struct xe_gt * gt,enum xe_engine_class engine_class)1097 u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
1098 				enum xe_engine_class engine_class)
1099 {
1100 	u32 mask = 0;
1101 	enum xe_hw_engine_id id;
1102 
1103 	for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
1104 		if (engine_infos[id].class == engine_class &&
1105 		    gt->info.engine_mask & BIT(id))
1106 			mask |= BIT(engine_infos[id].instance);
1107 	}
1108 	return mask;
1109 }
1110 
xe_hw_engine_is_reserved(struct xe_hw_engine * hwe)1111 bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
1112 {
1113 	struct xe_gt *gt = hwe->gt;
1114 	struct xe_device *xe = gt_to_xe(gt);
1115 
1116 	if (hwe->class == XE_ENGINE_CLASS_OTHER)
1117 		return true;
1118 
1119 	/* Check for engines disabled by ccs_mode setting */
1120 	if (xe_gt_ccs_mode_enabled(gt) &&
1121 	    hwe->class == XE_ENGINE_CLASS_COMPUTE &&
1122 	    hwe->logical_instance >= gt->ccs_mode)
1123 		return true;
1124 
1125 	return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
1126 		hwe->instance == gt->usm.reserved_bcs_instance;
1127 }
1128 
xe_hw_engine_class_to_str(enum xe_engine_class class)1129 const char *xe_hw_engine_class_to_str(enum xe_engine_class class)
1130 {
1131 	switch (class) {
1132 	case XE_ENGINE_CLASS_RENDER:
1133 		return "rcs";
1134 	case XE_ENGINE_CLASS_VIDEO_DECODE:
1135 		return "vcs";
1136 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
1137 		return "vecs";
1138 	case XE_ENGINE_CLASS_COPY:
1139 		return "bcs";
1140 	case XE_ENGINE_CLASS_OTHER:
1141 		return "other";
1142 	case XE_ENGINE_CLASS_COMPUTE:
1143 		return "ccs";
1144 	case XE_ENGINE_CLASS_MAX:
1145 		break;
1146 	}
1147 
1148 	return NULL;
1149 }
1150 
xe_hw_engine_read_timestamp(struct xe_hw_engine * hwe)1151 u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe)
1152 {
1153 	return xe_mmio_read64_2x32(hwe->gt, RING_TIMESTAMP(hwe->mmio_base));
1154 }
1155 
xe_hw_engine_to_fw_domain(struct xe_hw_engine * hwe)1156 enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe)
1157 {
1158 	return engine_infos[hwe->engine_id].domain;
1159 }
1160 
1161 static const enum xe_engine_class user_to_xe_engine_class[] = {
1162 	[DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
1163 	[DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
1164 	[DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
1165 	[DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
1166 	[DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
1167 };
1168 
1169 /**
1170  * xe_hw_engine_lookup() - Lookup hardware engine for class:instance
1171  * @xe: xe device
1172  * @eci: engine class and instance
1173  *
1174  * This function will find a hardware engine for given engine
1175  * class and instance.
1176  *
1177  * Return: If found xe_hw_engine pointer, NULL otherwise.
1178  */
1179 struct xe_hw_engine *
xe_hw_engine_lookup(struct xe_device * xe,struct drm_xe_engine_class_instance eci)1180 xe_hw_engine_lookup(struct xe_device *xe,
1181 		    struct drm_xe_engine_class_instance eci)
1182 {
1183 	unsigned int idx;
1184 
1185 	if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class))
1186 		return NULL;
1187 
1188 	if (eci.gt_id >= xe->info.gt_count)
1189 		return NULL;
1190 
1191 	idx = array_index_nospec(eci.engine_class,
1192 				 ARRAY_SIZE(user_to_xe_engine_class));
1193 
1194 	return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id),
1195 			       user_to_xe_engine_class[idx],
1196 			       eci.engine_instance, true);
1197 }
1198