1 /* SPDX-License-Identifier: GPL-2.0 or MIT */
2 /* Copyright 2023 Collabora ltd. */
3 
4 #ifndef __PANTHOR_MCU_H__
5 #define __PANTHOR_MCU_H__
6 
7 #include <linux/types.h>
8 
9 struct panthor_device;
10 struct panthor_kernel_bo;
11 
12 #define MAX_CSGS				31
13 #define MAX_CS_PER_CSG                          32
14 
15 struct panthor_fw_ringbuf_input_iface {
16 	u64 insert;
17 	u64 extract;
18 };
19 
20 struct panthor_fw_ringbuf_output_iface {
21 	u64 extract;
22 	u32 active;
23 };
24 
25 struct panthor_fw_cs_control_iface {
26 #define CS_FEATURES_WORK_REGS(x)		(((x) & GENMASK(7, 0)) + 1)
27 #define CS_FEATURES_SCOREBOARDS(x)		(((x) & GENMASK(15, 8)) >> 8)
28 #define CS_FEATURES_COMPUTE			BIT(16)
29 #define CS_FEATURES_FRAGMENT			BIT(17)
30 #define CS_FEATURES_TILER			BIT(18)
31 	u32 features;
32 	u32 input_va;
33 	u32 output_va;
34 };
35 
36 struct panthor_fw_cs_input_iface {
37 #define CS_STATE_MASK				GENMASK(2, 0)
38 #define CS_STATE_STOP				0
39 #define CS_STATE_START				1
40 #define CS_EXTRACT_EVENT			BIT(4)
41 #define CS_IDLE_SYNC_WAIT			BIT(8)
42 #define CS_IDLE_PROTM_PENDING			BIT(9)
43 #define CS_IDLE_EMPTY				BIT(10)
44 #define CS_IDLE_RESOURCE_REQ			BIT(11)
45 #define CS_TILER_OOM				BIT(26)
46 #define CS_PROTM_PENDING			BIT(27)
47 #define CS_FATAL				BIT(30)
48 #define CS_FAULT				BIT(31)
49 #define CS_REQ_MASK				(CS_STATE_MASK | \
50 						 CS_EXTRACT_EVENT | \
51 						 CS_IDLE_SYNC_WAIT | \
52 						 CS_IDLE_PROTM_PENDING | \
53 						 CS_IDLE_EMPTY | \
54 						 CS_IDLE_RESOURCE_REQ)
55 #define CS_EVT_MASK				(CS_TILER_OOM | \
56 						 CS_PROTM_PENDING | \
57 						 CS_FATAL | \
58 						 CS_FAULT)
59 	u32 req;
60 
61 #define CS_CONFIG_PRIORITY(x)			((x) & GENMASK(3, 0))
62 #define CS_CONFIG_DOORBELL(x)			(((x) << 8) & GENMASK(15, 8))
63 	u32 config;
64 	u32 reserved1;
65 	u32 ack_irq_mask;
66 	u64 ringbuf_base;
67 	u32 ringbuf_size;
68 	u32 reserved2;
69 	u64 heap_start;
70 	u64 heap_end;
71 	u64 ringbuf_input;
72 	u64 ringbuf_output;
73 	u32 instr_config;
74 	u32 instrbuf_size;
75 	u64 instrbuf_base;
76 	u64 instrbuf_offset_ptr;
77 };
78 
79 struct panthor_fw_cs_output_iface {
80 	u32 ack;
81 	u32 reserved1[15];
82 	u64 status_cmd_ptr;
83 
84 #define CS_STATUS_WAIT_SB_MASK			GENMASK(15, 0)
85 #define CS_STATUS_WAIT_SB_SRC_MASK		GENMASK(19, 16)
86 #define CS_STATUS_WAIT_SB_SRC_NONE		(0 << 16)
87 #define CS_STATUS_WAIT_SB_SRC_WAIT		(8 << 16)
88 #define CS_STATUS_WAIT_SYNC_COND_LE		(0 << 24)
89 #define CS_STATUS_WAIT_SYNC_COND_GT		(1 << 24)
90 #define CS_STATUS_WAIT_SYNC_COND_MASK		GENMASK(27, 24)
91 #define CS_STATUS_WAIT_PROGRESS			BIT(28)
92 #define CS_STATUS_WAIT_PROTM			BIT(29)
93 #define CS_STATUS_WAIT_SYNC_64B			BIT(30)
94 #define CS_STATUS_WAIT_SYNC			BIT(31)
95 	u32 status_wait;
96 	u32 status_req_resource;
97 	u64 status_wait_sync_ptr;
98 	u32 status_wait_sync_value;
99 	u32 status_scoreboards;
100 
101 #define CS_STATUS_BLOCKED_REASON_UNBLOCKED	0
102 #define CS_STATUS_BLOCKED_REASON_SB_WAIT	1
103 #define CS_STATUS_BLOCKED_REASON_PROGRESS_WAIT	2
104 #define CS_STATUS_BLOCKED_REASON_SYNC_WAIT	3
105 #define CS_STATUS_BLOCKED_REASON_DEFERRED	5
106 #define CS_STATUS_BLOCKED_REASON_RES		6
107 #define CS_STATUS_BLOCKED_REASON_FLUSH		7
108 #define CS_STATUS_BLOCKED_REASON_MASK		GENMASK(3, 0)
109 	u32 status_blocked_reason;
110 	u32 status_wait_sync_value_hi;
111 	u32 reserved2[6];
112 
113 #define CS_EXCEPTION_TYPE(x)			((x) & GENMASK(7, 0))
114 #define CS_EXCEPTION_DATA(x)			(((x) >> 8) & GENMASK(23, 0))
115 	u32 fault;
116 	u32 fatal;
117 	u64 fault_info;
118 	u64 fatal_info;
119 	u32 reserved3[10];
120 	u32 heap_vt_start;
121 	u32 heap_vt_end;
122 	u32 reserved4;
123 	u32 heap_frag_end;
124 	u64 heap_address;
125 };
126 
127 struct panthor_fw_csg_control_iface {
128 	u32 features;
129 	u32 input_va;
130 	u32 output_va;
131 	u32 suspend_size;
132 	u32 protm_suspend_size;
133 	u32 stream_num;
134 	u32 stream_stride;
135 };
136 
137 struct panthor_fw_csg_input_iface {
138 #define CSG_STATE_MASK				GENMASK(2, 0)
139 #define CSG_STATE_TERMINATE			0
140 #define CSG_STATE_START				1
141 #define CSG_STATE_SUSPEND			2
142 #define CSG_STATE_RESUME			3
143 #define CSG_ENDPOINT_CONFIG			BIT(4)
144 #define CSG_STATUS_UPDATE			BIT(5)
145 #define CSG_SYNC_UPDATE				BIT(28)
146 #define CSG_IDLE				BIT(29)
147 #define CSG_DOORBELL				BIT(30)
148 #define CSG_PROGRESS_TIMER_EVENT		BIT(31)
149 #define CSG_REQ_MASK				(CSG_STATE_MASK | \
150 						 CSG_ENDPOINT_CONFIG | \
151 						 CSG_STATUS_UPDATE)
152 #define CSG_EVT_MASK				(CSG_SYNC_UPDATE | \
153 						 CSG_IDLE | \
154 						 CSG_PROGRESS_TIMER_EVENT)
155 	u32 req;
156 	u32 ack_irq_mask;
157 
158 	u32 doorbell_req;
159 	u32 cs_irq_ack;
160 	u32 reserved1[4];
161 	u64 allow_compute;
162 	u64 allow_fragment;
163 	u32 allow_other;
164 
165 #define CSG_EP_REQ_COMPUTE(x)			((x) & GENMASK(7, 0))
166 #define CSG_EP_REQ_FRAGMENT(x)			(((x) << 8) & GENMASK(15, 8))
167 #define CSG_EP_REQ_TILER(x)			(((x) << 16) & GENMASK(19, 16))
168 #define CSG_EP_REQ_EXCL_COMPUTE			BIT(20)
169 #define CSG_EP_REQ_EXCL_FRAGMENT		BIT(21)
170 #define CSG_EP_REQ_PRIORITY(x)			(((x) << 28) & GENMASK(31, 28))
171 #define CSG_EP_REQ_PRIORITY_MASK		GENMASK(31, 28)
172 	u32 endpoint_req;
173 	u32 reserved2[2];
174 	u64 suspend_buf;
175 	u64 protm_suspend_buf;
176 	u32 config;
177 	u32 iter_trace_config;
178 };
179 
180 struct panthor_fw_csg_output_iface {
181 	u32 ack;
182 	u32 reserved1;
183 	u32 doorbell_ack;
184 	u32 cs_irq_req;
185 	u32 status_endpoint_current;
186 	u32 status_endpoint_req;
187 
188 #define CSG_STATUS_STATE_IS_IDLE		BIT(0)
189 	u32 status_state;
190 	u32 resource_dep;
191 };
192 
193 struct panthor_fw_global_control_iface {
194 	u32 version;
195 	u32 features;
196 	u32 input_va;
197 	u32 output_va;
198 	u32 group_num;
199 	u32 group_stride;
200 	u32 perfcnt_size;
201 	u32 instr_features;
202 };
203 
204 struct panthor_fw_global_input_iface {
205 #define GLB_HALT				BIT(0)
206 #define GLB_CFG_PROGRESS_TIMER			BIT(1)
207 #define GLB_CFG_ALLOC_EN			BIT(2)
208 #define GLB_CFG_POWEROFF_TIMER			BIT(3)
209 #define GLB_PROTM_ENTER				BIT(4)
210 #define GLB_PERFCNT_EN				BIT(5)
211 #define GLB_PERFCNT_SAMPLE			BIT(6)
212 #define GLB_COUNTER_EN				BIT(7)
213 #define GLB_PING				BIT(8)
214 #define GLB_FWCFG_UPDATE			BIT(9)
215 #define GLB_IDLE_EN				BIT(10)
216 #define GLB_SLEEP				BIT(12)
217 #define GLB_INACTIVE_COMPUTE			BIT(20)
218 #define GLB_INACTIVE_FRAGMENT			BIT(21)
219 #define GLB_INACTIVE_TILER			BIT(22)
220 #define GLB_PROTM_EXIT				BIT(23)
221 #define GLB_PERFCNT_THRESHOLD			BIT(24)
222 #define GLB_PERFCNT_OVERFLOW			BIT(25)
223 #define GLB_IDLE				BIT(26)
224 #define GLB_DBG_CSF				BIT(30)
225 #define GLB_DBG_HOST				BIT(31)
226 #define GLB_REQ_MASK				GENMASK(10, 0)
227 #define GLB_EVT_MASK				GENMASK(26, 20)
228 	u32 req;
229 	u32 ack_irq_mask;
230 	u32 doorbell_req;
231 	u32 reserved1;
232 	u32 progress_timer;
233 
234 #define GLB_TIMER_VAL(x)			((x) & GENMASK(30, 0))
235 #define GLB_TIMER_SOURCE_GPU_COUNTER		BIT(31)
236 	u32 poweroff_timer;
237 	u64 core_en_mask;
238 	u32 reserved2;
239 	u32 perfcnt_as;
240 	u64 perfcnt_base;
241 	u32 perfcnt_extract;
242 	u32 reserved3[3];
243 	u32 perfcnt_config;
244 	u32 perfcnt_csg_select;
245 	u32 perfcnt_fw_enable;
246 	u32 perfcnt_csg_enable;
247 	u32 perfcnt_csf_enable;
248 	u32 perfcnt_shader_enable;
249 	u32 perfcnt_tiler_enable;
250 	u32 perfcnt_mmu_l2_enable;
251 	u32 reserved4[8];
252 	u32 idle_timer;
253 };
254 
255 enum panthor_fw_halt_status {
256 	PANTHOR_FW_HALT_OK = 0,
257 	PANTHOR_FW_HALT_ON_PANIC = 0x4e,
258 	PANTHOR_FW_HALT_ON_WATCHDOG_EXPIRATION = 0x4f,
259 };
260 
261 struct panthor_fw_global_output_iface {
262 	u32 ack;
263 	u32 reserved1;
264 	u32 doorbell_ack;
265 	u32 reserved2;
266 	u32 halt_status;
267 	u32 perfcnt_status;
268 	u32 perfcnt_insert;
269 };
270 
271 /**
272  * struct panthor_fw_cs_iface - Firmware command stream slot interface
273  */
274 struct panthor_fw_cs_iface {
275 	/**
276 	 * @lock: Lock protecting access to the panthor_fw_cs_input_iface::req
277 	 * field.
278 	 *
279 	 * Needed so we can update the req field concurrently from the interrupt
280 	 * handler and the scheduler logic.
281 	 *
282 	 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
283 	 * interface sections are mapped uncached/write-combined right now, and
284 	 * using cmpxchg() on such mappings leads to SError faults. Revisit when
285 	 * we have 'SHARED' GPU mappings hooked up.
286 	 */
287 	spinlock_t lock;
288 
289 	/**
290 	 * @control: Command stream slot control interface.
291 	 *
292 	 * Used to expose command stream slot properties.
293 	 *
294 	 * This interface is read-only.
295 	 */
296 	struct panthor_fw_cs_control_iface *control;
297 
298 	/**
299 	 * @input: Command stream slot input interface.
300 	 *
301 	 * Used for host updates/events.
302 	 */
303 	struct panthor_fw_cs_input_iface *input;
304 
305 	/**
306 	 * @output: Command stream slot output interface.
307 	 *
308 	 * Used for FW updates/events.
309 	 *
310 	 * This interface is read-only.
311 	 */
312 	const struct panthor_fw_cs_output_iface *output;
313 };
314 
315 /**
316  * struct panthor_fw_csg_iface - Firmware command stream group slot interface
317  */
318 struct panthor_fw_csg_iface {
319 	/**
320 	 * @lock: Lock protecting access to the panthor_fw_csg_input_iface::req
321 	 * field.
322 	 *
323 	 * Needed so we can update the req field concurrently from the interrupt
324 	 * handler and the scheduler logic.
325 	 *
326 	 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
327 	 * interface sections are mapped uncached/write-combined right now, and
328 	 * using cmpxchg() on such mappings leads to SError faults. Revisit when
329 	 * we have 'SHARED' GPU mappings hooked up.
330 	 */
331 	spinlock_t lock;
332 
333 	/**
334 	 * @control: Command stream group slot control interface.
335 	 *
336 	 * Used to expose command stream group slot properties.
337 	 *
338 	 * This interface is read-only.
339 	 */
340 	const struct panthor_fw_csg_control_iface *control;
341 
342 	/**
343 	 * @input: Command stream slot input interface.
344 	 *
345 	 * Used for host updates/events.
346 	 */
347 	struct panthor_fw_csg_input_iface *input;
348 
349 	/**
350 	 * @output: Command stream group slot output interface.
351 	 *
352 	 * Used for FW updates/events.
353 	 *
354 	 * This interface is read-only.
355 	 */
356 	const struct panthor_fw_csg_output_iface *output;
357 };
358 
359 /**
360  * struct panthor_fw_global_iface - Firmware global interface
361  */
362 struct panthor_fw_global_iface {
363 	/**
364 	 * @lock: Lock protecting access to the panthor_fw_global_input_iface::req
365 	 * field.
366 	 *
367 	 * Needed so we can update the req field concurrently from the interrupt
368 	 * handler and the scheduler/FW management logic.
369 	 *
370 	 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
371 	 * interface sections are mapped uncached/write-combined right now, and
372 	 * using cmpxchg() on such mappings leads to SError faults. Revisit when
373 	 * we have 'SHARED' GPU mappings hooked up.
374 	 */
375 	spinlock_t lock;
376 
377 	/**
378 	 * @control: Command stream group slot control interface.
379 	 *
380 	 * Used to expose global FW properties.
381 	 *
382 	 * This interface is read-only.
383 	 */
384 	const struct panthor_fw_global_control_iface *control;
385 
386 	/**
387 	 * @input: Global input interface.
388 	 *
389 	 * Used for host updates/events.
390 	 */
391 	struct panthor_fw_global_input_iface *input;
392 
393 	/**
394 	 * @output: Global output interface.
395 	 *
396 	 * Used for FW updates/events.
397 	 *
398 	 * This interface is read-only.
399 	 */
400 	const struct panthor_fw_global_output_iface *output;
401 };
402 
403 /**
404  * panthor_fw_toggle_reqs() - Toggle acknowledge bits to send an event to the FW
405  * @__iface: The interface to operate on.
406  * @__in_reg: Name of the register to update in the input section of the interface.
407  * @__out_reg: Name of the register to take as a reference in the output section of the
408  * interface.
409  * @__mask: Mask to apply to the update.
410  *
411  * The Host -> FW event/message passing was designed to be lockless, with each side of
412  * the channel having its writeable section. Events are signaled as a difference between
413  * the host and FW side in the req/ack registers (when a bit differs, there's an event
414  * pending, when they are the same, nothing needs attention).
415  *
416  * This helper allows one to update the req register based on the current value of the
417  * ack register managed by the FW. Toggling a specific bit will flag an event. In order
418  * for events to be re-evaluated, the interface doorbell needs to be rung.
419  *
420  * Concurrent accesses to the same req register is covered.
421  *
422  * Anything requiring atomic updates to multiple registers requires a dedicated lock.
423  */
424 #define panthor_fw_toggle_reqs(__iface, __in_reg, __out_reg, __mask) \
425 	do { \
426 		u32 __cur_val, __new_val, __out_val; \
427 		spin_lock(&(__iface)->lock); \
428 		__cur_val = READ_ONCE((__iface)->input->__in_reg); \
429 		__out_val = READ_ONCE((__iface)->output->__out_reg); \
430 		__new_val = ((__out_val ^ (__mask)) & (__mask)) | (__cur_val & ~(__mask)); \
431 		WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
432 		spin_unlock(&(__iface)->lock); \
433 	} while (0)
434 
435 /**
436  * panthor_fw_update_reqs() - Update bits to reflect a configuration change
437  * @__iface: The interface to operate on.
438  * @__in_reg: Name of the register to update in the input section of the interface.
439  * @__val: Value to set.
440  * @__mask: Mask to apply to the update.
441  *
442  * Some configuration get passed through req registers that are also used to
443  * send events to the FW. Those req registers being updated from the interrupt
444  * handler, they require special helpers to update the configuration part as well.
445  *
446  * Concurrent accesses to the same req register is covered.
447  *
448  * Anything requiring atomic updates to multiple registers requires a dedicated lock.
449  */
450 #define panthor_fw_update_reqs(__iface, __in_reg, __val, __mask) \
451 	do { \
452 		u32 __cur_val, __new_val; \
453 		spin_lock(&(__iface)->lock); \
454 		__cur_val = READ_ONCE((__iface)->input->__in_reg); \
455 		__new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \
456 		WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
457 		spin_unlock(&(__iface)->lock); \
458 	} while (0)
459 
460 struct panthor_fw_global_iface *
461 panthor_fw_get_glb_iface(struct panthor_device *ptdev);
462 
463 struct panthor_fw_csg_iface *
464 panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot);
465 
466 struct panthor_fw_cs_iface *
467 panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot);
468 
469 int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_id, u32 req_mask,
470 			     u32 *acked, u32 timeout_ms);
471 
472 int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, u32 req_mask, u32 *acked,
473 			     u32 timeout_ms);
474 
475 void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_slot);
476 
477 struct panthor_kernel_bo *
478 panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
479 				 struct panthor_fw_ringbuf_input_iface **input,
480 				 const struct panthor_fw_ringbuf_output_iface **output,
481 				 u32 *input_fw_va, u32 *output_fw_va);
482 struct panthor_kernel_bo *
483 panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size);
484 
485 struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev);
486 
487 void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang);
488 int panthor_fw_post_reset(struct panthor_device *ptdev);
489 
panthor_fw_suspend(struct panthor_device * ptdev)490 static inline void panthor_fw_suspend(struct panthor_device *ptdev)
491 {
492 	panthor_fw_pre_reset(ptdev, false);
493 }
494 
panthor_fw_resume(struct panthor_device * ptdev)495 static inline int panthor_fw_resume(struct panthor_device *ptdev)
496 {
497 	return panthor_fw_post_reset(ptdev);
498 }
499 
500 int panthor_fw_init(struct panthor_device *ptdev);
501 void panthor_fw_unplug(struct panthor_device *ptdev);
502 
503 #endif
504