1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2014-2018 Etnaviv Project
4  */
5 
6 #include <drm/drm_drv.h>
7 
8 #include "etnaviv_cmdbuf.h"
9 #include "etnaviv_gpu.h"
10 #include "etnaviv_gem.h"
11 #include "etnaviv_mmu.h"
12 
13 #include "common.xml.h"
14 #include "state.xml.h"
15 #include "state_blt.xml.h"
16 #include "state_hi.xml.h"
17 #include "state_3d.xml.h"
18 #include "cmdstream.xml.h"
19 
20 /*
21  * Command Buffer helper:
22  */
23 
24 
OUT(struct etnaviv_cmdbuf * buffer,u32 data)25 static inline void OUT(struct etnaviv_cmdbuf *buffer, u32 data)
26 {
27 	u32 *vaddr = (u32 *)buffer->vaddr;
28 
29 	BUG_ON(buffer->user_size >= buffer->size);
30 
31 	vaddr[buffer->user_size / 4] = data;
32 	buffer->user_size += 4;
33 }
34 
CMD_LOAD_STATE(struct etnaviv_cmdbuf * buffer,u32 reg,u32 value)35 static inline void CMD_LOAD_STATE(struct etnaviv_cmdbuf *buffer,
36 	u32 reg, u32 value)
37 {
38 	u32 index = reg >> VIV_FE_LOAD_STATE_HEADER_OFFSET__SHR;
39 
40 	buffer->user_size = ALIGN(buffer->user_size, 8);
41 
42 	/* write a register via cmd stream */
43 	OUT(buffer, VIV_FE_LOAD_STATE_HEADER_OP_LOAD_STATE |
44 		    VIV_FE_LOAD_STATE_HEADER_COUNT(1) |
45 		    VIV_FE_LOAD_STATE_HEADER_OFFSET(index));
46 	OUT(buffer, value);
47 }
48 
CMD_END(struct etnaviv_cmdbuf * buffer)49 static inline void CMD_END(struct etnaviv_cmdbuf *buffer)
50 {
51 	buffer->user_size = ALIGN(buffer->user_size, 8);
52 
53 	OUT(buffer, VIV_FE_END_HEADER_OP_END);
54 }
55 
CMD_WAIT(struct etnaviv_cmdbuf * buffer,unsigned int waitcycles)56 static inline void CMD_WAIT(struct etnaviv_cmdbuf *buffer,
57 			    unsigned int waitcycles)
58 {
59 	buffer->user_size = ALIGN(buffer->user_size, 8);
60 
61 	OUT(buffer, VIV_FE_WAIT_HEADER_OP_WAIT | waitcycles);
62 }
63 
CMD_LINK(struct etnaviv_cmdbuf * buffer,u16 prefetch,u32 address)64 static inline void CMD_LINK(struct etnaviv_cmdbuf *buffer,
65 	u16 prefetch, u32 address)
66 {
67 	buffer->user_size = ALIGN(buffer->user_size, 8);
68 
69 	OUT(buffer, VIV_FE_LINK_HEADER_OP_LINK |
70 		    VIV_FE_LINK_HEADER_PREFETCH(prefetch));
71 	OUT(buffer, address);
72 }
73 
CMD_STALL(struct etnaviv_cmdbuf * buffer,u32 from,u32 to)74 static inline void CMD_STALL(struct etnaviv_cmdbuf *buffer,
75 	u32 from, u32 to)
76 {
77 	buffer->user_size = ALIGN(buffer->user_size, 8);
78 
79 	OUT(buffer, VIV_FE_STALL_HEADER_OP_STALL);
80 	OUT(buffer, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to));
81 }
82 
CMD_SEM(struct etnaviv_cmdbuf * buffer,u32 from,u32 to)83 static inline void CMD_SEM(struct etnaviv_cmdbuf *buffer, u32 from, u32 to)
84 {
85 	CMD_LOAD_STATE(buffer, VIVS_GL_SEMAPHORE_TOKEN,
86 		       VIVS_GL_SEMAPHORE_TOKEN_FROM(from) |
87 		       VIVS_GL_SEMAPHORE_TOKEN_TO(to));
88 }
89 
etnaviv_cmd_select_pipe(struct etnaviv_gpu * gpu,struct etnaviv_cmdbuf * buffer,u8 pipe)90 static void etnaviv_cmd_select_pipe(struct etnaviv_gpu *gpu,
91 	struct etnaviv_cmdbuf *buffer, u8 pipe)
92 {
93 	u32 flush = 0;
94 
95 	lockdep_assert_held(&gpu->lock);
96 
97 	/*
98 	 * This assumes that if we're switching to 2D, we're switching
99 	 * away from 3D, and vice versa.  Hence, if we're switching to
100 	 * the 2D core, we need to flush the 3D depth and color caches,
101 	 * otherwise we need to flush the 2D pixel engine cache.
102 	 */
103 	if (gpu->exec_state == ETNA_PIPE_2D)
104 		flush = VIVS_GL_FLUSH_CACHE_PE2D;
105 	else if (gpu->exec_state == ETNA_PIPE_3D)
106 		flush = VIVS_GL_FLUSH_CACHE_DEPTH | VIVS_GL_FLUSH_CACHE_COLOR;
107 
108 	CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush);
109 	CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
110 	CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
111 
112 	CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
113 		       VIVS_GL_PIPE_SELECT_PIPE(pipe));
114 }
115 
etnaviv_buffer_dump(struct etnaviv_gpu * gpu,struct etnaviv_cmdbuf * buf,u32 off,u32 len)116 static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu,
117 	struct etnaviv_cmdbuf *buf, u32 off, u32 len)
118 {
119 	u32 size = buf->size;
120 	u32 *ptr = buf->vaddr + off;
121 
122 	dev_info(gpu->dev, "virt %p phys 0x%08x free 0x%08x\n",
123 			ptr, etnaviv_cmdbuf_get_va(buf,
124 			&gpu->mmu_context->cmdbuf_mapping) +
125 			off, size - len * 4 - off);
126 
127 	print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
128 			ptr, len * 4, 0);
129 }
130 
131 /*
132  * Safely replace the WAIT of a waitlink with a new command and argument.
133  * The GPU may be executing this WAIT while we're modifying it, so we have
134  * to write it in a specific order to avoid the GPU branching to somewhere
135  * else.  'wl_offset' is the offset to the first byte of the WAIT command.
136  */
etnaviv_buffer_replace_wait(struct etnaviv_cmdbuf * buffer,unsigned int wl_offset,u32 cmd,u32 arg)137 static void etnaviv_buffer_replace_wait(struct etnaviv_cmdbuf *buffer,
138 	unsigned int wl_offset, u32 cmd, u32 arg)
139 {
140 	u32 *lw = buffer->vaddr + wl_offset;
141 
142 	lw[1] = arg;
143 	mb();
144 	lw[0] = cmd;
145 	mb();
146 }
147 
148 /*
149  * Ensure that there is space in the command buffer to contiguously write
150  * 'cmd_dwords' 64-bit words into the buffer, wrapping if necessary.
151  */
etnaviv_buffer_reserve(struct etnaviv_gpu * gpu,struct etnaviv_cmdbuf * buffer,unsigned int cmd_dwords)152 static u32 etnaviv_buffer_reserve(struct etnaviv_gpu *gpu,
153 	struct etnaviv_cmdbuf *buffer, unsigned int cmd_dwords)
154 {
155 	if (buffer->user_size + cmd_dwords * sizeof(u64) > buffer->size)
156 		buffer->user_size = 0;
157 
158 	return etnaviv_cmdbuf_get_va(buffer,
159 				     &gpu->mmu_context->cmdbuf_mapping) +
160 	       buffer->user_size;
161 }
162 
etnaviv_buffer_init(struct etnaviv_gpu * gpu)163 u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
164 {
165 	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
166 
167 	lockdep_assert_held(&gpu->lock);
168 
169 	/* initialize buffer */
170 	buffer->user_size = 0;
171 
172 	CMD_WAIT(buffer, gpu->fe_waitcycles);
173 	CMD_LINK(buffer, 2,
174 		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
175 		 + buffer->user_size - 4);
176 
177 	return buffer->user_size / 8;
178 }
179 
etnaviv_buffer_config_mmuv2(struct etnaviv_gpu * gpu,u32 mtlb_addr,u32 safe_addr)180 u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe_addr)
181 {
182 	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
183 
184 	lockdep_assert_held(&gpu->lock);
185 
186 	buffer->user_size = 0;
187 
188 	if (gpu->identity.features & chipFeatures_PIPE_3D) {
189 		CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
190 			       VIVS_GL_PIPE_SELECT_PIPE(ETNA_PIPE_3D));
191 		CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
192 			mtlb_addr | VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K);
193 		CMD_LOAD_STATE(buffer, VIVS_MMUv2_SAFE_ADDRESS, safe_addr);
194 		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
195 		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
196 	}
197 
198 	if (gpu->identity.features & chipFeatures_PIPE_2D) {
199 		CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
200 			       VIVS_GL_PIPE_SELECT_PIPE(ETNA_PIPE_2D));
201 		CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
202 			mtlb_addr | VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K);
203 		CMD_LOAD_STATE(buffer, VIVS_MMUv2_SAFE_ADDRESS, safe_addr);
204 		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
205 		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
206 	}
207 
208 	CMD_END(buffer);
209 
210 	buffer->user_size = ALIGN(buffer->user_size, 8);
211 
212 	return buffer->user_size / 8;
213 }
214 
etnaviv_buffer_config_pta(struct etnaviv_gpu * gpu,unsigned short id)215 u16 etnaviv_buffer_config_pta(struct etnaviv_gpu *gpu, unsigned short id)
216 {
217 	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
218 
219 	lockdep_assert_held(&gpu->lock);
220 
221 	buffer->user_size = 0;
222 
223 	CMD_LOAD_STATE(buffer, VIVS_MMUv2_PTA_CONFIG,
224 		       VIVS_MMUv2_PTA_CONFIG_INDEX(id));
225 
226 	CMD_END(buffer);
227 
228 	buffer->user_size = ALIGN(buffer->user_size, 8);
229 
230 	return buffer->user_size / 8;
231 }
232 
etnaviv_buffer_end(struct etnaviv_gpu * gpu)233 void etnaviv_buffer_end(struct etnaviv_gpu *gpu)
234 {
235 	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
236 	unsigned int waitlink_offset = buffer->user_size - 16;
237 	u32 link_target, flush = 0;
238 	bool has_blt = !!(gpu->identity.minor_features5 &
239 			  chipMinorFeatures5_BLT_ENGINE);
240 
241 	lockdep_assert_held(&gpu->lock);
242 
243 	if (gpu->exec_state == ETNA_PIPE_2D)
244 		flush = VIVS_GL_FLUSH_CACHE_PE2D;
245 	else if (gpu->exec_state == ETNA_PIPE_3D)
246 		flush = VIVS_GL_FLUSH_CACHE_DEPTH |
247 			VIVS_GL_FLUSH_CACHE_COLOR |
248 			VIVS_GL_FLUSH_CACHE_TEXTURE |
249 			VIVS_GL_FLUSH_CACHE_TEXTUREVS |
250 			VIVS_GL_FLUSH_CACHE_SHADER_L2;
251 
252 	if (flush) {
253 		unsigned int dwords = 7;
254 
255 		if (has_blt)
256 			dwords += 10;
257 
258 		link_target = etnaviv_buffer_reserve(gpu, buffer, dwords);
259 
260 		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
261 		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
262 		if (has_blt) {
263 			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
264 			CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
265 			CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
266 			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
267 		}
268 		CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush);
269 		if (gpu->exec_state == ETNA_PIPE_3D) {
270 			if (has_blt) {
271 				CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
272 				CMD_LOAD_STATE(buffer, VIVS_BLT_SET_COMMAND, 0x1);
273 				CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
274 			} else {
275 				CMD_LOAD_STATE(buffer, VIVS_TS_FLUSH_CACHE,
276 					       VIVS_TS_FLUSH_CACHE_FLUSH);
277 			}
278 		}
279 		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
280 		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
281 		if (has_blt) {
282 			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
283 			CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
284 			CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
285 			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
286 		}
287 		CMD_END(buffer);
288 
289 		etnaviv_buffer_replace_wait(buffer, waitlink_offset,
290 					    VIV_FE_LINK_HEADER_OP_LINK |
291 					    VIV_FE_LINK_HEADER_PREFETCH(dwords),
292 					    link_target);
293 	} else {
294 		/* Replace the last link-wait with an "END" command */
295 		etnaviv_buffer_replace_wait(buffer, waitlink_offset,
296 					    VIV_FE_END_HEADER_OP_END, 0);
297 	}
298 }
299 
300 /* Append a 'sync point' to the ring buffer. */
etnaviv_sync_point_queue(struct etnaviv_gpu * gpu,unsigned int event)301 void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)
302 {
303 	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
304 	unsigned int waitlink_offset = buffer->user_size - 16;
305 	u32 dwords, target;
306 
307 	lockdep_assert_held(&gpu->lock);
308 
309 	/*
310 	 * We need at most 3 dwords in the return target:
311 	 * 1 event + 1 end + 1 wait + 1 link.
312 	 */
313 	dwords = 4;
314 	target = etnaviv_buffer_reserve(gpu, buffer, dwords);
315 
316 	/* Signal sync point event */
317 	CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
318 		       VIVS_GL_EVENT_FROM_PE);
319 
320 	/* Stop the FE to 'pause' the GPU */
321 	CMD_END(buffer);
322 
323 	/* Append waitlink */
324 	CMD_WAIT(buffer, gpu->fe_waitcycles);
325 	CMD_LINK(buffer, 2,
326 		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
327 		 + buffer->user_size - 4);
328 
329 	/*
330 	 * Kick off the 'sync point' command by replacing the previous
331 	 * WAIT with a link to the address in the ring buffer.
332 	 */
333 	etnaviv_buffer_replace_wait(buffer, waitlink_offset,
334 				    VIV_FE_LINK_HEADER_OP_LINK |
335 				    VIV_FE_LINK_HEADER_PREFETCH(dwords),
336 				    target);
337 }
338 
339 /* Append a command buffer to the ring buffer. */
etnaviv_buffer_queue(struct etnaviv_gpu * gpu,u32 exec_state,struct etnaviv_iommu_context * mmu_context,unsigned int event,struct etnaviv_cmdbuf * cmdbuf)340 void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
341 	struct etnaviv_iommu_context *mmu_context, unsigned int event,
342 	struct etnaviv_cmdbuf *cmdbuf)
343 {
344 	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
345 	unsigned int waitlink_offset = buffer->user_size - 16;
346 	u32 return_target, return_dwords;
347 	u32 link_target, link_dwords;
348 	bool switch_context = gpu->exec_state != exec_state;
349 	bool switch_mmu_context = gpu->mmu_context != mmu_context;
350 	unsigned int new_flush_seq = READ_ONCE(gpu->mmu_context->flush_seq);
351 	bool need_flush = switch_mmu_context || gpu->flush_seq != new_flush_seq;
352 	bool has_blt = !!(gpu->identity.minor_features5 &
353 			  chipMinorFeatures5_BLT_ENGINE);
354 
355 	lockdep_assert_held(&gpu->lock);
356 
357 	if (drm_debug_enabled(DRM_UT_DRIVER))
358 		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
359 
360 	link_target = etnaviv_cmdbuf_get_va(cmdbuf,
361 					    &gpu->mmu_context->cmdbuf_mapping);
362 	link_dwords = cmdbuf->size / 8;
363 
364 	/*
365 	 * If we need maintenance prior to submitting this buffer, we will
366 	 * need to append a mmu flush load state, followed by a new
367 	 * link to this buffer - a total of four additional words.
368 	 */
369 	if (need_flush || switch_context) {
370 		u32 target, extra_dwords;
371 
372 		/* link command */
373 		extra_dwords = 1;
374 
375 		/* flush command */
376 		if (need_flush) {
377 			if (gpu->mmu_context->global->version == ETNAVIV_IOMMU_V1)
378 				extra_dwords += 1;
379 			else
380 				extra_dwords += 3;
381 		}
382 
383 		/* pipe switch commands */
384 		if (switch_context)
385 			extra_dwords += 4;
386 
387 		/* PTA load command */
388 		if (switch_mmu_context && gpu->sec_mode == ETNA_SEC_KERNEL)
389 			extra_dwords += 1;
390 
391 		target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords);
392 		/*
393 		 * Switch MMU context if necessary. Must be done after the
394 		 * link target has been calculated, as the jump forward in the
395 		 * kernel ring still uses the last active MMU context before
396 		 * the switch.
397 		 */
398 		if (switch_mmu_context) {
399 			struct etnaviv_iommu_context *old_context = gpu->mmu_context;
400 
401 			gpu->mmu_context = etnaviv_iommu_context_get(mmu_context);
402 			etnaviv_iommu_context_put(old_context);
403 		}
404 
405 		if (need_flush) {
406 			/* Add the MMU flush */
407 			if (gpu->mmu_context->global->version == ETNAVIV_IOMMU_V1) {
408 				CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_MMU,
409 					       VIVS_GL_FLUSH_MMU_FLUSH_FEMMU |
410 					       VIVS_GL_FLUSH_MMU_FLUSH_UNK1 |
411 					       VIVS_GL_FLUSH_MMU_FLUSH_UNK2 |
412 					       VIVS_GL_FLUSH_MMU_FLUSH_PEMMU |
413 					       VIVS_GL_FLUSH_MMU_FLUSH_UNK4);
414 			} else {
415 				u32 flush = VIVS_MMUv2_CONFIGURATION_MODE_MASK |
416 					    VIVS_MMUv2_CONFIGURATION_FLUSH_FLUSH;
417 
418 				if (switch_mmu_context &&
419 				    gpu->sec_mode == ETNA_SEC_KERNEL) {
420 					unsigned short id =
421 						etnaviv_iommuv2_get_pta_id(gpu->mmu_context);
422 					CMD_LOAD_STATE(buffer,
423 						VIVS_MMUv2_PTA_CONFIG,
424 						VIVS_MMUv2_PTA_CONFIG_INDEX(id));
425 				}
426 
427 				if (gpu->sec_mode == ETNA_SEC_NONE)
428 					flush |= etnaviv_iommuv2_get_mtlb_addr(gpu->mmu_context);
429 
430 				CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
431 					       flush);
432 				CMD_SEM(buffer, SYNC_RECIPIENT_FE,
433 					SYNC_RECIPIENT_PE);
434 				CMD_STALL(buffer, SYNC_RECIPIENT_FE,
435 					SYNC_RECIPIENT_PE);
436 			}
437 
438 			gpu->flush_seq = new_flush_seq;
439 		}
440 
441 		if (switch_context) {
442 			etnaviv_cmd_select_pipe(gpu, buffer, exec_state);
443 			gpu->exec_state = exec_state;
444 		}
445 
446 		/* And the link to the submitted buffer */
447 		link_target = etnaviv_cmdbuf_get_va(cmdbuf,
448 					&gpu->mmu_context->cmdbuf_mapping);
449 		CMD_LINK(buffer, link_dwords, link_target);
450 
451 		/* Update the link target to point to above instructions */
452 		link_target = target;
453 		link_dwords = extra_dwords;
454 	}
455 
456 	/*
457 	 * Append a LINK to the submitted command buffer to return to
458 	 * the ring buffer.  return_target is the ring target address.
459 	 * We need at most 7 dwords in the return target: 2 cache flush +
460 	 * 2 semaphore stall + 1 event + 1 wait + 1 link.
461 	 */
462 	return_dwords = 7;
463 
464 	/*
465 	 * When the BLT engine is present we need 6 more dwords in the return
466 	 * target: 3 enable/flush/disable + 4 enable/semaphore stall/disable,
467 	 * but we don't need the normal TS flush state.
468 	 */
469 	if (has_blt)
470 		return_dwords += 6;
471 
472 	return_target = etnaviv_buffer_reserve(gpu, buffer, return_dwords);
473 	CMD_LINK(cmdbuf, return_dwords, return_target);
474 
475 	/*
476 	 * Append a cache flush, stall, event, wait and link pointing back to
477 	 * the wait command to the ring buffer.
478 	 */
479 	if (gpu->exec_state == ETNA_PIPE_2D) {
480 		CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE,
481 				       VIVS_GL_FLUSH_CACHE_PE2D);
482 	} else {
483 		CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE,
484 				       VIVS_GL_FLUSH_CACHE_DEPTH |
485 				       VIVS_GL_FLUSH_CACHE_COLOR);
486 		if (has_blt) {
487 			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
488 			CMD_LOAD_STATE(buffer, VIVS_BLT_SET_COMMAND, 0x1);
489 			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
490 		} else {
491 			CMD_LOAD_STATE(buffer, VIVS_TS_FLUSH_CACHE,
492 					       VIVS_TS_FLUSH_CACHE_FLUSH);
493 		}
494 	}
495 	CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
496 	CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
497 
498 	if (has_blt) {
499 		CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
500 		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
501 		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
502 		CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
503 	}
504 
505 	CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
506 		       VIVS_GL_EVENT_FROM_PE);
507 	CMD_WAIT(buffer, gpu->fe_waitcycles);
508 	CMD_LINK(buffer, 2,
509 		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
510 		 + buffer->user_size - 4);
511 
512 	if (drm_debug_enabled(DRM_UT_DRIVER))
513 		pr_info("stream link to 0x%08x @ 0x%08x %p\n",
514 			return_target,
515 			etnaviv_cmdbuf_get_va(cmdbuf, &gpu->mmu_context->cmdbuf_mapping),
516 			cmdbuf->vaddr);
517 
518 	if (drm_debug_enabled(DRM_UT_DRIVER)) {
519 		print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
520 			       cmdbuf->vaddr, cmdbuf->size, 0);
521 
522 		pr_info("link op: %p\n", buffer->vaddr + waitlink_offset);
523 		pr_info("addr: 0x%08x\n", link_target);
524 		pr_info("back: 0x%08x\n", return_target);
525 		pr_info("event: %d\n", event);
526 	}
527 
528 	/*
529 	 * Kick off the submitted command by replacing the previous
530 	 * WAIT with a link to the address in the ring buffer.
531 	 */
532 	etnaviv_buffer_replace_wait(buffer, waitlink_offset,
533 				    VIV_FE_LINK_HEADER_OP_LINK |
534 				    VIV_FE_LINK_HEADER_PREFETCH(link_dwords),
535 				    link_target);
536 
537 	if (drm_debug_enabled(DRM_UT_DRIVER))
538 		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
539 }
540