1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2014-2019 Intel Corporation
4  */
5 
6 #include <linux/bsearch.h>
7 
8 #include "gem/i915_gem_lmem.h"
9 #include "gt/intel_engine_regs.h"
10 #include "gt/intel_gt.h"
11 #include "gt/intel_gt_mcr.h"
12 #include "gt/intel_gt_regs.h"
13 #include "gt/intel_lrc.h"
14 #include "gt/shmem_utils.h"
15 #include "intel_guc_ads.h"
16 #include "intel_guc_capture.h"
17 #include "intel_guc_fwif.h"
18 #include "intel_guc_print.h"
19 #include "intel_uc.h"
20 #include "i915_drv.h"
21 
22 /*
23  * The Additional Data Struct (ADS) has pointers for different buffers used by
24  * the GuC. One single gem object contains the ADS struct itself (guc_ads) and
25  * all the extra buffers indirectly linked via the ADS struct's entries.
26  *
27  * Layout of the ADS blob allocated for the GuC:
28  *
29  *      +---------------------------------------+ <== base
30  *      | guc_ads                               |
31  *      +---------------------------------------+
32  *      | guc_policies                          |
33  *      +---------------------------------------+
34  *      | guc_gt_system_info                    |
35  *      +---------------------------------------+
36  *      | guc_engine_usage                      |
37  *      +---------------------------------------+ <== static
38  *      | guc_mmio_reg[countA] (engine 0.0)     |
39  *      | guc_mmio_reg[countB] (engine 0.1)     |
40  *      | guc_mmio_reg[countC] (engine 1.0)     |
41  *      |   ...                                 |
42  *      +---------------------------------------+ <== dynamic
43  *      | padding                               |
44  *      +---------------------------------------+ <== 4K aligned
45  *      | golden contexts                       |
46  *      +---------------------------------------+
47  *      | padding                               |
48  *      +---------------------------------------+ <== 4K aligned
49  *      | w/a KLVs                              |
50  *      +---------------------------------------+
51  *      | padding                               |
52  *      +---------------------------------------+ <== 4K aligned
53  *      | capture lists                         |
54  *      +---------------------------------------+
55  *      | padding                               |
56  *      +---------------------------------------+ <== 4K aligned
57  *      | private data                          |
58  *      +---------------------------------------+
59  *      | padding                               |
60  *      +---------------------------------------+ <== 4K aligned
61  */
62 struct __guc_ads_blob {
63 	struct guc_ads ads;
64 	struct guc_policies policies;
65 	struct guc_gt_system_info system_info;
66 	struct guc_engine_usage engine_usage;
67 	/* From here on, location is dynamic! Refer to above diagram. */
68 	struct guc_mmio_reg regset[];
69 } __packed;
70 
71 #define ads_blob_read(guc_, field_)					\
72 	iosys_map_rd_field(&(guc_)->ads_map, 0, struct __guc_ads_blob, field_)
73 
74 #define ads_blob_write(guc_, field_, val_)				\
75 	iosys_map_wr_field(&(guc_)->ads_map, 0, struct __guc_ads_blob,	\
76 			   field_, val_)
77 
78 #define info_map_write(map_, field_, val_) \
79 	iosys_map_wr_field(map_, 0, struct guc_gt_system_info, field_, val_)
80 
81 #define info_map_read(map_, field_) \
82 	iosys_map_rd_field(map_, 0, struct guc_gt_system_info, field_)
83 
guc_ads_regset_size(struct intel_guc * guc)84 static u32 guc_ads_regset_size(struct intel_guc *guc)
85 {
86 	GEM_BUG_ON(!guc->ads_regset_size);
87 	return guc->ads_regset_size;
88 }
89 
guc_ads_golden_ctxt_size(struct intel_guc * guc)90 static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc)
91 {
92 	return PAGE_ALIGN(guc->ads_golden_ctxt_size);
93 }
94 
guc_ads_waklv_size(struct intel_guc * guc)95 static u32 guc_ads_waklv_size(struct intel_guc *guc)
96 {
97 	return PAGE_ALIGN(guc->ads_waklv_size);
98 }
99 
guc_ads_capture_size(struct intel_guc * guc)100 static u32 guc_ads_capture_size(struct intel_guc *guc)
101 {
102 	return PAGE_ALIGN(guc->ads_capture_size);
103 }
104 
guc_ads_private_data_size(struct intel_guc * guc)105 static u32 guc_ads_private_data_size(struct intel_guc *guc)
106 {
107 	return PAGE_ALIGN(guc->fw.private_data_size);
108 }
109 
guc_ads_regset_offset(struct intel_guc * guc)110 static u32 guc_ads_regset_offset(struct intel_guc *guc)
111 {
112 	return offsetof(struct __guc_ads_blob, regset);
113 }
114 
guc_ads_golden_ctxt_offset(struct intel_guc * guc)115 static u32 guc_ads_golden_ctxt_offset(struct intel_guc *guc)
116 {
117 	u32 offset;
118 
119 	offset = guc_ads_regset_offset(guc) +
120 		 guc_ads_regset_size(guc);
121 
122 	return PAGE_ALIGN(offset);
123 }
124 
guc_ads_waklv_offset(struct intel_guc * guc)125 static u32 guc_ads_waklv_offset(struct intel_guc *guc)
126 {
127 	u32 offset;
128 
129 	offset = guc_ads_golden_ctxt_offset(guc) +
130 		 guc_ads_golden_ctxt_size(guc);
131 
132 	return PAGE_ALIGN(offset);
133 }
134 
guc_ads_capture_offset(struct intel_guc * guc)135 static u32 guc_ads_capture_offset(struct intel_guc *guc)
136 {
137 	u32 offset;
138 
139 	offset = guc_ads_waklv_offset(guc) +
140 		 guc_ads_waklv_size(guc);
141 
142 	return PAGE_ALIGN(offset);
143 }
144 
guc_ads_private_data_offset(struct intel_guc * guc)145 static u32 guc_ads_private_data_offset(struct intel_guc *guc)
146 {
147 	u32 offset;
148 
149 	offset = guc_ads_capture_offset(guc) +
150 		 guc_ads_capture_size(guc);
151 
152 	return PAGE_ALIGN(offset);
153 }
154 
guc_ads_blob_size(struct intel_guc * guc)155 static u32 guc_ads_blob_size(struct intel_guc *guc)
156 {
157 	return guc_ads_private_data_offset(guc) +
158 	       guc_ads_private_data_size(guc);
159 }
160 
guc_policies_init(struct intel_guc * guc)161 static void guc_policies_init(struct intel_guc *guc)
162 {
163 	struct intel_gt *gt = guc_to_gt(guc);
164 	struct drm_i915_private *i915 = gt->i915;
165 	u32 global_flags = 0;
166 
167 	ads_blob_write(guc, policies.dpc_promote_time,
168 		       GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US);
169 	ads_blob_write(guc, policies.max_num_work_items,
170 		       GLOBAL_POLICY_MAX_NUM_WI);
171 
172 	if (i915->params.reset < 2)
173 		global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
174 
175 	ads_blob_write(guc, policies.global_flags, global_flags);
176 	ads_blob_write(guc, policies.is_valid, 1);
177 }
178 
intel_guc_ads_print_policy_info(struct intel_guc * guc,struct drm_printer * dp)179 void intel_guc_ads_print_policy_info(struct intel_guc *guc,
180 				     struct drm_printer *dp)
181 {
182 	if (unlikely(iosys_map_is_null(&guc->ads_map)))
183 		return;
184 
185 	drm_printf(dp, "Global scheduling policies:\n");
186 	drm_printf(dp, "  DPC promote time   = %u\n",
187 		   ads_blob_read(guc, policies.dpc_promote_time));
188 	drm_printf(dp, "  Max num work items = %u\n",
189 		   ads_blob_read(guc, policies.max_num_work_items));
190 	drm_printf(dp, "  Flags              = %u\n",
191 		   ads_blob_read(guc, policies.global_flags));
192 }
193 
guc_action_policies_update(struct intel_guc * guc,u32 policy_offset)194 static int guc_action_policies_update(struct intel_guc *guc, u32 policy_offset)
195 {
196 	u32 action[] = {
197 		INTEL_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE,
198 		policy_offset
199 	};
200 
201 	return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
202 }
203 
intel_guc_global_policies_update(struct intel_guc * guc)204 int intel_guc_global_policies_update(struct intel_guc *guc)
205 {
206 	struct intel_gt *gt = guc_to_gt(guc);
207 	u32 scheduler_policies;
208 	intel_wakeref_t wakeref;
209 	int ret;
210 
211 	if (iosys_map_is_null(&guc->ads_map))
212 		return -EOPNOTSUPP;
213 
214 	scheduler_policies = ads_blob_read(guc, ads.scheduler_policies);
215 	GEM_BUG_ON(!scheduler_policies);
216 
217 	guc_policies_init(guc);
218 
219 	if (!intel_guc_is_ready(guc))
220 		return 0;
221 
222 	with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
223 		ret = guc_action_policies_update(guc, scheduler_policies);
224 
225 	return ret;
226 }
227 
guc_mapping_table_init(struct intel_gt * gt,struct iosys_map * info_map)228 static void guc_mapping_table_init(struct intel_gt *gt,
229 				   struct iosys_map *info_map)
230 {
231 	unsigned int i, j;
232 	struct intel_engine_cs *engine;
233 	enum intel_engine_id id;
234 
235 	/* Table must be set to invalid values for entries not used */
236 	for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i)
237 		for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j)
238 			info_map_write(info_map, mapping_table[i][j],
239 				       GUC_MAX_INSTANCES_PER_CLASS);
240 
241 	for_each_engine(engine, gt, id) {
242 		u8 guc_class = engine_class_to_guc_class(engine->class);
243 
244 		info_map_write(info_map, mapping_table[guc_class][ilog2(engine->logical_mask)],
245 			       engine->instance);
246 	}
247 }
248 
249 /*
250  * The save/restore register list must be pre-calculated to a temporary
251  * buffer before it can be copied inside the ADS.
252  */
253 struct temp_regset {
254 	/*
255 	 * ptr to the section of the storage for the engine currently being
256 	 * worked on
257 	 */
258 	struct guc_mmio_reg *registers;
259 	/* ptr to the base of the allocated storage for all engines */
260 	struct guc_mmio_reg *storage;
261 	u32 storage_used;
262 	u32 storage_max;
263 };
264 
guc_mmio_reg_cmp(const void * a,const void * b)265 static int guc_mmio_reg_cmp(const void *a, const void *b)
266 {
267 	const struct guc_mmio_reg *ra = a;
268 	const struct guc_mmio_reg *rb = b;
269 
270 	return (int)ra->offset - (int)rb->offset;
271 }
272 
273 static struct guc_mmio_reg * __must_check
__mmio_reg_add(struct temp_regset * regset,struct guc_mmio_reg * reg)274 __mmio_reg_add(struct temp_regset *regset, struct guc_mmio_reg *reg)
275 {
276 	u32 pos = regset->storage_used;
277 	struct guc_mmio_reg *slot;
278 
279 	if (pos >= regset->storage_max) {
280 		size_t size = ALIGN((pos + 1) * sizeof(*slot), PAGE_SIZE);
281 		struct guc_mmio_reg *r = krealloc(regset->storage,
282 						  size, GFP_KERNEL);
283 		if (!r) {
284 			WARN_ONCE(1, "Incomplete regset list: can't add register (%d)\n",
285 				  -ENOMEM);
286 			return ERR_PTR(-ENOMEM);
287 		}
288 
289 		regset->registers = r + (regset->registers - regset->storage);
290 		regset->storage = r;
291 		regset->storage_max = size / sizeof(*slot);
292 	}
293 
294 	slot = &regset->storage[pos];
295 	regset->storage_used++;
296 	*slot = *reg;
297 
298 	return slot;
299 }
300 
guc_mmio_reg_add(struct intel_gt * gt,struct temp_regset * regset,u32 offset,u32 flags)301 static long __must_check guc_mmio_reg_add(struct intel_gt *gt,
302 					  struct temp_regset *regset,
303 					  u32 offset, u32 flags)
304 {
305 	u32 count = regset->storage_used - (regset->registers - regset->storage);
306 	struct guc_mmio_reg entry = {
307 		.offset = offset,
308 		.flags = flags,
309 	};
310 	struct guc_mmio_reg *slot;
311 
312 	/*
313 	 * The mmio list is built using separate lists within the driver.
314 	 * It's possible that at some point we may attempt to add the same
315 	 * register more than once. Do not consider this an error; silently
316 	 * move on if the register is already in the list.
317 	 */
318 	if (bsearch(&entry, regset->registers, count,
319 		    sizeof(entry), guc_mmio_reg_cmp))
320 		return 0;
321 
322 	slot = __mmio_reg_add(regset, &entry);
323 	if (IS_ERR(slot))
324 		return PTR_ERR(slot);
325 
326 	while (slot-- > regset->registers) {
327 		GEM_BUG_ON(slot[0].offset == slot[1].offset);
328 		if (slot[1].offset > slot[0].offset)
329 			break;
330 
331 		swap(slot[1], slot[0]);
332 	}
333 
334 	return 0;
335 }
336 
337 #define GUC_MMIO_REG_ADD(gt, regset, reg, masked) \
338 	guc_mmio_reg_add(gt, \
339 			 regset, \
340 			 i915_mmio_reg_offset(reg), \
341 			 (masked) ? GUC_REGSET_MASKED : 0)
342 
343 #define GUC_REGSET_STEERING(group, instance) ( \
344 	FIELD_PREP(GUC_REGSET_STEERING_GROUP, (group)) | \
345 	FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, (instance)) | \
346 	GUC_REGSET_NEEDS_STEERING \
347 )
348 
guc_mcr_reg_add(struct intel_gt * gt,struct temp_regset * regset,i915_mcr_reg_t reg,u32 flags)349 static long __must_check guc_mcr_reg_add(struct intel_gt *gt,
350 					 struct temp_regset *regset,
351 					 i915_mcr_reg_t reg, u32 flags)
352 {
353 	u8 group, inst;
354 
355 	/*
356 	 * The GuC doesn't have a default steering, so we need to explicitly
357 	 * steer all registers that need steering. However, we do not keep track
358 	 * of all the steering ranges, only of those that have a chance of using
359 	 * a non-default steering from the i915 pov. Instead of adding such
360 	 * tracking, it is easier to just program the default steering for all
361 	 * regs that don't need a non-default one.
362 	 */
363 	intel_gt_mcr_get_nonterminated_steering(gt, reg, &group, &inst);
364 	flags |= GUC_REGSET_STEERING(group, inst);
365 
366 	return guc_mmio_reg_add(gt, regset, i915_mmio_reg_offset(reg), flags);
367 }
368 
369 #define GUC_MCR_REG_ADD(gt, regset, reg, masked) \
370 	guc_mcr_reg_add(gt, \
371 			 regset, \
372 			 (reg), \
373 			 (masked) ? GUC_REGSET_MASKED : 0)
374 
guc_mmio_regset_init(struct temp_regset * regset,struct intel_engine_cs * engine)375 static int guc_mmio_regset_init(struct temp_regset *regset,
376 				struct intel_engine_cs *engine)
377 {
378 	struct intel_gt *gt = engine->gt;
379 	const u32 base = engine->mmio_base;
380 	struct i915_wa_list *wal = &engine->wa_list;
381 	struct i915_wa *wa;
382 	unsigned int i;
383 	int ret = 0;
384 
385 	/*
386 	 * Each engine's registers point to a new start relative to
387 	 * storage
388 	 */
389 	regset->registers = regset->storage + regset->storage_used;
390 
391 	ret |= GUC_MMIO_REG_ADD(gt, regset, RING_MODE_GEN7(base), true);
392 	ret |= GUC_MMIO_REG_ADD(gt, regset, RING_HWS_PGA(base), false);
393 	ret |= GUC_MMIO_REG_ADD(gt, regset, RING_IMR(base), false);
394 
395 	if ((engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) &&
396 	    CCS_MASK(engine->gt))
397 		ret |= GUC_MMIO_REG_ADD(gt, regset, GEN12_RCU_MODE, true);
398 
399 	/*
400 	 * some of the WA registers are MCR registers. As it is safe to
401 	 * use MCR form for non-MCR registers, for code simplicity, all
402 	 * WA registers are added with MCR form.
403 	 */
404 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
405 		ret |= GUC_MCR_REG_ADD(gt, regset, wa->mcr_reg, wa->masked_reg);
406 
407 	/* Be extra paranoid and include all whitelist registers. */
408 	for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++)
409 		ret |= GUC_MMIO_REG_ADD(gt, regset,
410 					RING_FORCE_TO_NONPRIV(base, i),
411 					false);
412 
413 	/* add in local MOCS registers */
414 	for (i = 0; i < LNCFCMOCS_REG_COUNT; i++)
415 		if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
416 			ret |= GUC_MCR_REG_ADD(gt, regset, XEHP_LNCFCMOCS(i), false);
417 		else
418 			ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false);
419 
420 	if (GRAPHICS_VER(engine->i915) >= 12) {
421 		ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL0)), false);
422 		ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL1)), false);
423 		ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL2)), false);
424 		ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL3)), false);
425 		ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL4)), false);
426 		ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL5)), false);
427 		ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL6)), false);
428 	}
429 
430 	return ret ? -1 : 0;
431 }
432 
guc_mmio_reg_state_create(struct intel_guc * guc)433 static long guc_mmio_reg_state_create(struct intel_guc *guc)
434 {
435 	struct intel_gt *gt = guc_to_gt(guc);
436 	struct intel_engine_cs *engine;
437 	enum intel_engine_id id;
438 	struct temp_regset temp_set = {};
439 	long total = 0;
440 	long ret;
441 
442 	for_each_engine(engine, gt, id) {
443 		u32 used = temp_set.storage_used;
444 
445 		ret = guc_mmio_regset_init(&temp_set, engine);
446 		if (ret < 0)
447 			goto fail_regset_init;
448 
449 		guc->ads_regset_count[id] = temp_set.storage_used - used;
450 		total += guc->ads_regset_count[id];
451 	}
452 
453 	guc->ads_regset = temp_set.storage;
454 
455 	guc_dbg(guc, "Used %zu KB for temporary ADS regset\n",
456 		(temp_set.storage_max * sizeof(struct guc_mmio_reg)) >> 10);
457 
458 	return total * sizeof(struct guc_mmio_reg);
459 
460 fail_regset_init:
461 	kfree(temp_set.storage);
462 	return ret;
463 }
464 
guc_mmio_reg_state_init(struct intel_guc * guc)465 static void guc_mmio_reg_state_init(struct intel_guc *guc)
466 {
467 	struct intel_gt *gt = guc_to_gt(guc);
468 	struct intel_engine_cs *engine;
469 	enum intel_engine_id id;
470 	u32 addr_ggtt, offset;
471 
472 	offset = guc_ads_regset_offset(guc);
473 	addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
474 
475 	iosys_map_memcpy_to(&guc->ads_map, offset, guc->ads_regset,
476 			    guc->ads_regset_size);
477 
478 	for_each_engine(engine, gt, id) {
479 		u32 count = guc->ads_regset_count[id];
480 		u8 guc_class;
481 
482 		/* Class index is checked in class converter */
483 		GEM_BUG_ON(engine->instance >= GUC_MAX_INSTANCES_PER_CLASS);
484 
485 		guc_class = engine_class_to_guc_class(engine->class);
486 
487 		if (!count) {
488 			ads_blob_write(guc,
489 				       ads.reg_state_list[guc_class][engine->instance].address,
490 				       0);
491 			ads_blob_write(guc,
492 				       ads.reg_state_list[guc_class][engine->instance].count,
493 				       0);
494 			continue;
495 		}
496 
497 		ads_blob_write(guc,
498 			       ads.reg_state_list[guc_class][engine->instance].address,
499 			       addr_ggtt);
500 		ads_blob_write(guc,
501 			       ads.reg_state_list[guc_class][engine->instance].count,
502 			       count);
503 
504 		addr_ggtt += count * sizeof(struct guc_mmio_reg);
505 	}
506 }
507 
fill_engine_enable_masks(struct intel_gt * gt,struct iosys_map * info_map)508 static void fill_engine_enable_masks(struct intel_gt *gt,
509 				     struct iosys_map *info_map)
510 {
511 	info_map_write(info_map, engine_enabled_masks[GUC_RENDER_CLASS], RCS_MASK(gt));
512 	info_map_write(info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], CCS_MASK(gt));
513 	info_map_write(info_map, engine_enabled_masks[GUC_BLITTER_CLASS], BCS_MASK(gt));
514 	info_map_write(info_map, engine_enabled_masks[GUC_VIDEO_CLASS], VDBOX_MASK(gt));
515 	info_map_write(info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], VEBOX_MASK(gt));
516 
517 	/* The GSC engine is an instance (6) of OTHER_CLASS */
518 	if (gt->engine[GSC0])
519 		info_map_write(info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS],
520 			       BIT(gt->engine[GSC0]->instance));
521 }
522 
523 #define LR_HW_CONTEXT_SIZE (80 * sizeof(u32))
524 #define XEHP_LR_HW_CONTEXT_SIZE (96 * sizeof(u32))
525 #define LR_HW_CONTEXT_SZ(i915) (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55) ? \
526 				    XEHP_LR_HW_CONTEXT_SIZE : \
527 				    LR_HW_CONTEXT_SIZE)
528 #define LRC_SKIP_SIZE(i915) (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SZ(i915))
guc_prep_golden_context(struct intel_guc * guc)529 static int guc_prep_golden_context(struct intel_guc *guc)
530 {
531 	struct intel_gt *gt = guc_to_gt(guc);
532 	u32 addr_ggtt, offset;
533 	u32 total_size = 0, alloc_size, real_size;
534 	u8 engine_class, guc_class;
535 	struct guc_gt_system_info local_info;
536 	struct iosys_map info_map;
537 
538 	/*
539 	 * Reserve the memory for the golden contexts and point GuC at it but
540 	 * leave it empty for now. The context data will be filled in later
541 	 * once there is something available to put there.
542 	 *
543 	 * Note that the HWSP and ring context are not included.
544 	 *
545 	 * Note also that the storage must be pinned in the GGTT, so that the
546 	 * address won't change after GuC has been told where to find it. The
547 	 * GuC will also validate that the LRC base + size fall within the
548 	 * allowed GGTT range.
549 	 */
550 	if (!iosys_map_is_null(&guc->ads_map)) {
551 		offset = guc_ads_golden_ctxt_offset(guc);
552 		addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
553 		info_map = IOSYS_MAP_INIT_OFFSET(&guc->ads_map,
554 						 offsetof(struct __guc_ads_blob, system_info));
555 	} else {
556 		memset(&local_info, 0, sizeof(local_info));
557 		iosys_map_set_vaddr(&info_map, &local_info);
558 		fill_engine_enable_masks(gt, &info_map);
559 	}
560 
561 	for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) {
562 		guc_class = engine_class_to_guc_class(engine_class);
563 
564 		if (!info_map_read(&info_map, engine_enabled_masks[guc_class]))
565 			continue;
566 
567 		real_size = intel_engine_context_size(gt, engine_class);
568 		alloc_size = PAGE_ALIGN(real_size);
569 		total_size += alloc_size;
570 
571 		if (iosys_map_is_null(&guc->ads_map))
572 			continue;
573 
574 		/*
575 		 * This interface is slightly confusing. We need to pass the
576 		 * base address of the full golden context and the size of just
577 		 * the engine state, which is the section of the context image
578 		 * that starts after the execlists context. This is required to
579 		 * allow the GuC to restore just the engine state when a
580 		 * watchdog reset occurs.
581 		 * We calculate the engine state size by removing the size of
582 		 * what comes before it in the context image (which is identical
583 		 * on all engines).
584 		 */
585 		ads_blob_write(guc, ads.eng_state_size[guc_class],
586 			       real_size - LRC_SKIP_SIZE(gt->i915));
587 		ads_blob_write(guc, ads.golden_context_lrca[guc_class],
588 			       addr_ggtt);
589 
590 		addr_ggtt += alloc_size;
591 	}
592 
593 	/* Make sure current size matches what we calculated previously */
594 	if (guc->ads_golden_ctxt_size)
595 		GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size);
596 
597 	return total_size;
598 }
599 
find_engine_state(struct intel_gt * gt,u8 engine_class)600 static struct intel_engine_cs *find_engine_state(struct intel_gt *gt, u8 engine_class)
601 {
602 	struct intel_engine_cs *engine;
603 	enum intel_engine_id id;
604 
605 	for_each_engine(engine, gt, id) {
606 		if (engine->class != engine_class)
607 			continue;
608 
609 		if (!engine->default_state)
610 			continue;
611 
612 		return engine;
613 	}
614 
615 	return NULL;
616 }
617 
guc_init_golden_context(struct intel_guc * guc)618 static void guc_init_golden_context(struct intel_guc *guc)
619 {
620 	struct intel_engine_cs *engine;
621 	struct intel_gt *gt = guc_to_gt(guc);
622 	unsigned long offset;
623 	u32 addr_ggtt, total_size = 0, alloc_size, real_size;
624 	u8 engine_class, guc_class;
625 
626 	if (!intel_uc_uses_guc_submission(&gt->uc))
627 		return;
628 
629 	GEM_BUG_ON(iosys_map_is_null(&guc->ads_map));
630 
631 	/*
632 	 * Go back and fill in the golden context data now that it is
633 	 * available.
634 	 */
635 	offset = guc_ads_golden_ctxt_offset(guc);
636 	addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
637 
638 	for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) {
639 		guc_class = engine_class_to_guc_class(engine_class);
640 		if (!ads_blob_read(guc, system_info.engine_enabled_masks[guc_class]))
641 			continue;
642 
643 		real_size = intel_engine_context_size(gt, engine_class);
644 		alloc_size = PAGE_ALIGN(real_size);
645 		total_size += alloc_size;
646 
647 		engine = find_engine_state(gt, engine_class);
648 		if (!engine) {
649 			guc_err(guc, "No engine state recorded for class %d!\n",
650 				engine_class);
651 			ads_blob_write(guc, ads.eng_state_size[guc_class], 0);
652 			ads_blob_write(guc, ads.golden_context_lrca[guc_class], 0);
653 			continue;
654 		}
655 
656 		GEM_BUG_ON(ads_blob_read(guc, ads.eng_state_size[guc_class]) !=
657 			   real_size - LRC_SKIP_SIZE(gt->i915));
658 		GEM_BUG_ON(ads_blob_read(guc, ads.golden_context_lrca[guc_class]) != addr_ggtt);
659 
660 		addr_ggtt += alloc_size;
661 
662 		shmem_read_to_iosys_map(engine->default_state, 0, &guc->ads_map,
663 					offset, real_size);
664 		offset += alloc_size;
665 	}
666 
667 	GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size);
668 }
669 
guc_get_capture_engine_mask(struct iosys_map * info_map,u32 capture_class)670 static u32 guc_get_capture_engine_mask(struct iosys_map *info_map, u32 capture_class)
671 {
672 	u32 mask;
673 
674 	switch (capture_class) {
675 	case GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE:
676 		mask = info_map_read(info_map, engine_enabled_masks[GUC_RENDER_CLASS]);
677 		mask |= info_map_read(info_map, engine_enabled_masks[GUC_COMPUTE_CLASS]);
678 		break;
679 
680 	case GUC_CAPTURE_LIST_CLASS_VIDEO:
681 		mask = info_map_read(info_map, engine_enabled_masks[GUC_VIDEO_CLASS]);
682 		break;
683 
684 	case GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE:
685 		mask = info_map_read(info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS]);
686 		break;
687 
688 	case GUC_CAPTURE_LIST_CLASS_BLITTER:
689 		mask = info_map_read(info_map, engine_enabled_masks[GUC_BLITTER_CLASS]);
690 		break;
691 
692 	case GUC_CAPTURE_LIST_CLASS_GSC_OTHER:
693 		mask = info_map_read(info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS]);
694 		break;
695 
696 	default:
697 		mask = 0;
698 	}
699 
700 	return mask;
701 }
702 
703 static int
guc_capture_prep_lists(struct intel_guc * guc)704 guc_capture_prep_lists(struct intel_guc *guc)
705 {
706 	struct intel_gt *gt = guc_to_gt(guc);
707 	u32 ads_ggtt, capture_offset, null_ggtt, total_size = 0;
708 	struct guc_gt_system_info local_info;
709 	struct iosys_map info_map;
710 	bool ads_is_mapped;
711 	size_t size = 0;
712 	void *ptr;
713 	int i, j;
714 
715 	ads_is_mapped = !iosys_map_is_null(&guc->ads_map);
716 	if (ads_is_mapped) {
717 		capture_offset = guc_ads_capture_offset(guc);
718 		ads_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma);
719 		info_map = IOSYS_MAP_INIT_OFFSET(&guc->ads_map,
720 						 offsetof(struct __guc_ads_blob, system_info));
721 	} else {
722 		memset(&local_info, 0, sizeof(local_info));
723 		iosys_map_set_vaddr(&info_map, &local_info);
724 		fill_engine_enable_masks(gt, &info_map);
725 	}
726 
727 	/* first, set aside the first page for a capture_list with zero descriptors */
728 	total_size = PAGE_SIZE;
729 	if (ads_is_mapped) {
730 		if (!intel_guc_capture_getnullheader(guc, &ptr, &size))
731 			iosys_map_memcpy_to(&guc->ads_map, capture_offset, ptr, size);
732 		null_ggtt = ads_ggtt + capture_offset;
733 		capture_offset += PAGE_SIZE;
734 	}
735 
736 	for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
737 		for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) {
738 			u32 engine_mask = guc_get_capture_engine_mask(&info_map, j);
739 
740 			/* null list if we dont have said engine or list */
741 			if (!engine_mask) {
742 				if (ads_is_mapped) {
743 					ads_blob_write(guc, ads.capture_class[i][j], null_ggtt);
744 					ads_blob_write(guc, ads.capture_instance[i][j], null_ggtt);
745 				}
746 				continue;
747 			}
748 			if (intel_guc_capture_getlistsize(guc, i,
749 							  GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
750 							  j, &size)) {
751 				if (ads_is_mapped)
752 					ads_blob_write(guc, ads.capture_class[i][j], null_ggtt);
753 				goto engine_instance_list;
754 			}
755 			total_size += size;
756 			if (ads_is_mapped) {
757 				if (total_size > guc->ads_capture_size ||
758 				    intel_guc_capture_getlist(guc, i,
759 							      GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
760 							      j, &ptr)) {
761 					ads_blob_write(guc, ads.capture_class[i][j], null_ggtt);
762 					continue;
763 				}
764 				ads_blob_write(guc, ads.capture_class[i][j], ads_ggtt +
765 					       capture_offset);
766 				iosys_map_memcpy_to(&guc->ads_map, capture_offset, ptr, size);
767 				capture_offset += size;
768 			}
769 engine_instance_list:
770 			if (intel_guc_capture_getlistsize(guc, i,
771 							  GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
772 							  j, &size)) {
773 				if (ads_is_mapped)
774 					ads_blob_write(guc, ads.capture_instance[i][j], null_ggtt);
775 				continue;
776 			}
777 			total_size += size;
778 			if (ads_is_mapped) {
779 				if (total_size > guc->ads_capture_size ||
780 				    intel_guc_capture_getlist(guc, i,
781 							      GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
782 							      j, &ptr)) {
783 					ads_blob_write(guc, ads.capture_instance[i][j], null_ggtt);
784 					continue;
785 				}
786 				ads_blob_write(guc, ads.capture_instance[i][j], ads_ggtt +
787 					       capture_offset);
788 				iosys_map_memcpy_to(&guc->ads_map, capture_offset, ptr, size);
789 				capture_offset += size;
790 			}
791 		}
792 		if (intel_guc_capture_getlistsize(guc, i, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &size)) {
793 			if (ads_is_mapped)
794 				ads_blob_write(guc, ads.capture_global[i], null_ggtt);
795 			continue;
796 		}
797 		total_size += size;
798 		if (ads_is_mapped) {
799 			if (total_size > guc->ads_capture_size ||
800 			    intel_guc_capture_getlist(guc, i, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0,
801 						      &ptr)) {
802 				ads_blob_write(guc, ads.capture_global[i], null_ggtt);
803 				continue;
804 			}
805 			ads_blob_write(guc, ads.capture_global[i], ads_ggtt + capture_offset);
806 			iosys_map_memcpy_to(&guc->ads_map, capture_offset, ptr, size);
807 			capture_offset += size;
808 		}
809 	}
810 
811 	if (guc->ads_capture_size && guc->ads_capture_size != PAGE_ALIGN(total_size))
812 		guc_warn(guc, "ADS capture alloc size changed from %d to %d\n",
813 			 guc->ads_capture_size, PAGE_ALIGN(total_size));
814 
815 	return PAGE_ALIGN(total_size);
816 }
817 
guc_waklv_enable_simple(struct intel_guc * guc,u32 * offset,u32 * remain,u32 klv_id)818 static void guc_waklv_enable_simple(struct intel_guc *guc, u32 *offset, u32 *remain, u32 klv_id)
819 {
820 	u32 size;
821 	u32 klv_entry[] = {
822 		/* 16:16 key/length */
823 		FIELD_PREP(GUC_KLV_0_KEY, klv_id) |
824 		FIELD_PREP(GUC_KLV_0_LEN, 0),
825 		/* 0 dwords data */
826 	};
827 
828 	size = sizeof(klv_entry);
829 	GEM_BUG_ON(*remain < size);
830 
831 	iosys_map_memcpy_to(&guc->ads_map, *offset, klv_entry, size);
832 	*offset += size;
833 	*remain -= size;
834 }
835 
guc_waklv_init(struct intel_guc * guc)836 static void guc_waklv_init(struct intel_guc *guc)
837 {
838 	struct intel_gt *gt = guc_to_gt(guc);
839 	u32 offset, addr_ggtt, remain, size;
840 
841 	if (!intel_uc_uses_guc_submission(&gt->uc))
842 		return;
843 
844 	if (GUC_FIRMWARE_VER(guc) < MAKE_GUC_VER(70, 10, 0))
845 		return;
846 
847 	GEM_BUG_ON(iosys_map_is_null(&guc->ads_map));
848 	offset = guc_ads_waklv_offset(guc);
849 	remain = guc_ads_waklv_size(guc);
850 
851 	/* Wa_14019159160 */
852 	if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
853 		guc_waklv_enable_simple(guc, &offset, &remain,
854 					GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE);
855 		guc_waklv_enable_simple(guc, &offset, &remain,
856 					GUC_WORKAROUND_KLV_AVOID_GFX_CLEAR_WHILE_ACTIVE);
857 	}
858 
859 	/* Wa_16021333562 */
860 	if ((GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 21, 1)) &&
861 	    (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) ||
862 	     IS_MEDIA_GT_IP_RANGE(gt, IP_VER(13, 0), IP_VER(13, 0)) ||
863 	     IS_DG2(gt->i915)))
864 		guc_waklv_enable_simple(guc, &offset, &remain,
865 					GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED);
866 
867 	size = guc_ads_waklv_size(guc) - remain;
868 	if (!size)
869 		return;
870 
871 	offset = guc_ads_waklv_offset(guc);
872 	addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
873 
874 	ads_blob_write(guc, ads.wa_klv_addr_lo, addr_ggtt);
875 	ads_blob_write(guc, ads.wa_klv_addr_hi, 0);
876 	ads_blob_write(guc, ads.wa_klv_size, size);
877 }
878 
guc_prep_waklv(struct intel_guc * guc)879 static int guc_prep_waklv(struct intel_guc *guc)
880 {
881 	/* Fudge something chunky for now: */
882 	return PAGE_SIZE;
883 }
884 
__guc_ads_init(struct intel_guc * guc)885 static void __guc_ads_init(struct intel_guc *guc)
886 {
887 	struct intel_gt *gt = guc_to_gt(guc);
888 	struct drm_i915_private *i915 = gt->i915;
889 	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(&guc->ads_map,
890 			offsetof(struct __guc_ads_blob, system_info));
891 	u32 base;
892 
893 	/* GuC scheduling policies */
894 	guc_policies_init(guc);
895 
896 	/* System info */
897 	fill_engine_enable_masks(gt, &info_map);
898 
899 	ads_blob_write(guc, system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED],
900 		       hweight8(gt->info.sseu.slice_mask));
901 	ads_blob_write(guc, system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK],
902 		       gt->info.vdbox_sfc_access);
903 
904 	if (GRAPHICS_VER(i915) >= 12 && !IS_DGFX(i915)) {
905 		u32 distdbreg = intel_uncore_read(gt->uncore,
906 						  GEN12_DIST_DBS_POPULATED);
907 		ads_blob_write(guc,
908 			       system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI],
909 			       ((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT)
910 				& GEN12_DOORBELLS_PER_SQIDI) + 1);
911 	}
912 
913 	/* Golden contexts for re-initialising after a watchdog reset */
914 	guc_prep_golden_context(guc);
915 
916 	guc_mapping_table_init(guc_to_gt(guc), &info_map);
917 
918 	base = intel_guc_ggtt_offset(guc, guc->ads_vma);
919 
920 	/* Lists for error capture debug */
921 	guc_capture_prep_lists(guc);
922 
923 	/* ADS */
924 	ads_blob_write(guc, ads.scheduler_policies, base +
925 		       offsetof(struct __guc_ads_blob, policies));
926 	ads_blob_write(guc, ads.gt_system_info, base +
927 		       offsetof(struct __guc_ads_blob, system_info));
928 
929 	/* MMIO save/restore list */
930 	guc_mmio_reg_state_init(guc);
931 
932 	/* Workaround KLV list */
933 	guc_waklv_init(guc);
934 
935 	/* Private Data */
936 	ads_blob_write(guc, ads.private_data, base +
937 		       guc_ads_private_data_offset(guc));
938 
939 	i915_gem_object_flush_map(guc->ads_vma->obj);
940 }
941 
942 /**
943  * intel_guc_ads_create() - allocates and initializes GuC ADS.
944  * @guc: intel_guc struct
945  *
946  * GuC needs memory block (Additional Data Struct), where it will store
947  * some data. Allocate and initialize such memory block for GuC use.
948  */
intel_guc_ads_create(struct intel_guc * guc)949 int intel_guc_ads_create(struct intel_guc *guc)
950 {
951 	void *ads_blob;
952 	u32 size;
953 	int ret;
954 
955 	GEM_BUG_ON(guc->ads_vma);
956 
957 	/*
958 	 * Create reg state size dynamically on system memory to be copied to
959 	 * the final ads blob on gt init/reset
960 	 */
961 	ret = guc_mmio_reg_state_create(guc);
962 	if (ret < 0)
963 		return ret;
964 	guc->ads_regset_size = ret;
965 
966 	/* Likewise the golden contexts: */
967 	ret = guc_prep_golden_context(guc);
968 	if (ret < 0)
969 		return ret;
970 	guc->ads_golden_ctxt_size = ret;
971 
972 	/* Likewise the capture lists: */
973 	ret = guc_capture_prep_lists(guc);
974 	if (ret < 0)
975 		return ret;
976 	guc->ads_capture_size = ret;
977 
978 	/* And don't forget the workaround KLVs: */
979 	ret = guc_prep_waklv(guc);
980 	if (ret < 0)
981 		return ret;
982 	guc->ads_waklv_size = ret;
983 
984 	/* Now the total size can be determined: */
985 	size = guc_ads_blob_size(guc);
986 
987 	ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma,
988 					     &ads_blob);
989 	if (ret)
990 		return ret;
991 
992 	if (i915_gem_object_is_lmem(guc->ads_vma->obj))
993 		iosys_map_set_vaddr_iomem(&guc->ads_map, (void __iomem *)ads_blob);
994 	else
995 		iosys_map_set_vaddr(&guc->ads_map, ads_blob);
996 
997 	__guc_ads_init(guc);
998 
999 	return 0;
1000 }
1001 
intel_guc_ads_init_late(struct intel_guc * guc)1002 void intel_guc_ads_init_late(struct intel_guc *guc)
1003 {
1004 	/*
1005 	 * The golden context setup requires the saved engine state from
1006 	 * __engines_record_defaults(). However, that requires engines to be
1007 	 * operational which means the ADS must already have been configured.
1008 	 * Fortunately, the golden context state is not needed until a hang
1009 	 * occurs, so it can be filled in during this late init phase.
1010 	 */
1011 	guc_init_golden_context(guc);
1012 }
1013 
intel_guc_ads_destroy(struct intel_guc * guc)1014 void intel_guc_ads_destroy(struct intel_guc *guc)
1015 {
1016 	i915_vma_unpin_and_release(&guc->ads_vma, I915_VMA_RELEASE_MAP);
1017 	iosys_map_clear(&guc->ads_map);
1018 	kfree(guc->ads_regset);
1019 }
1020 
guc_ads_private_data_reset(struct intel_guc * guc)1021 static void guc_ads_private_data_reset(struct intel_guc *guc)
1022 {
1023 	u32 size;
1024 
1025 	size = guc_ads_private_data_size(guc);
1026 	if (!size)
1027 		return;
1028 
1029 	iosys_map_memset(&guc->ads_map, guc_ads_private_data_offset(guc),
1030 			 0, size);
1031 }
1032 
1033 /**
1034  * intel_guc_ads_reset() - prepares GuC Additional Data Struct for reuse
1035  * @guc: intel_guc struct
1036  *
1037  * GuC stores some data in ADS, which might be stale after a reset.
1038  * Reinitialize whole ADS in case any part of it was corrupted during
1039  * previous GuC run.
1040  */
intel_guc_ads_reset(struct intel_guc * guc)1041 void intel_guc_ads_reset(struct intel_guc *guc)
1042 {
1043 	if (!guc->ads_vma)
1044 		return;
1045 
1046 	__guc_ads_init(guc);
1047 
1048 	guc_ads_private_data_reset(guc);
1049 }
1050 
intel_guc_engine_usage_offset(struct intel_guc * guc)1051 u32 intel_guc_engine_usage_offset(struct intel_guc *guc)
1052 {
1053 	return intel_guc_ggtt_offset(guc, guc->ads_vma) +
1054 		offsetof(struct __guc_ads_blob, engine_usage);
1055 }
1056 
intel_guc_engine_usage_record_map(struct intel_engine_cs * engine)1057 struct iosys_map intel_guc_engine_usage_record_map(struct intel_engine_cs *engine)
1058 {
1059 	struct intel_guc *guc = gt_to_guc(engine->gt);
1060 	u8 guc_class = engine_class_to_guc_class(engine->class);
1061 	size_t offset = offsetof(struct __guc_ads_blob,
1062 				 engine_usage.engines[guc_class][ilog2(engine->logical_mask)]);
1063 
1064 	return IOSYS_MAP_INIT_OFFSET(&guc->ads_map, offset);
1065 }
1066