1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45 
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47 
48 #include "amdgpu_ras.h"
49 
50 #include "amdgpu_ring_mux.h"
51 #include "gfx_v9_4.h"
52 #include "gfx_v9_0.h"
53 #include "gfx_v9_0_cleaner_shader.h"
54 #include "gfx_v9_4_2.h"
55 
56 #include "asic_reg/pwr/pwr_10_0_offset.h"
57 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
58 #include "asic_reg/gc/gc_9_0_default.h"
59 
60 #define GFX9_NUM_GFX_RINGS     1
61 #define GFX9_NUM_SW_GFX_RINGS  2
62 #define GFX9_MEC_HPD_SIZE 4096
63 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
64 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
65 
66 #define mmGCEA_PROBE_MAP                        0x070c
67 #define mmGCEA_PROBE_MAP_BASE_IDX               0
68 
69 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
74 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
75 
76 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
81 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
82 
83 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
88 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
89 
90 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
91 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
92 MODULE_FIRMWARE("amdgpu/raven_me.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
94 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
95 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
96 
97 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
103 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
104 
105 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
110 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
111 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
112 
113 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
115 
116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
120 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
121 
122 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
125 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
126 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
127 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
128 
129 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
133 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
134 
135 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
136 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
137 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
138 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
139 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
140 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
141 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
142 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
143 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
144 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
145 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
146 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
147 
148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
149 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
151 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
152 
153 static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
154 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
155 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
156 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
157 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
158 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
159 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
160 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
161 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
162 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
163 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
164 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
165 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
166 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
167 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
168 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
169 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
170 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
171 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
172 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
173 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
174 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
175 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
176 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
177 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
178 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
179 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
180 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
181 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
182 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
183 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
184 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
185 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
186 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
187 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
188 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
189 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
190 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
191 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
192 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
193 	SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
194 	SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
195 	SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
196 	SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
197 	SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
198 	SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
199 	SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL),
200 	SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
201 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
202 	SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
203 	SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS),
204 	SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS),
205 	SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS),
206 	SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS),
207 	SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
208 	SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL),
209 	SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS),
210 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
211 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
212 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
213 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
214 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
215 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
216 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
217 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
218 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
219 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
220 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
221 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
222 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
223 	SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
224 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
225 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
226 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
227 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
228 	/* cp header registers */
229 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
230 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
231 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
232 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
233 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
234 	/* SE status registers */
235 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
236 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
237 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
238 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
239 };
240 
241 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
242 	/* compute queue registers */
243 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
244 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE),
245 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
246 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
247 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
248 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
249 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
250 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
251 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
252 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
253 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
254 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
255 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
256 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
257 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
258 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
259 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
260 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
261 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
262 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
263 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
264 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
265 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
266 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
267 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
268 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
269 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
270 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
271 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
272 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
273 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
274 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
275 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
276 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
277 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
278 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
279 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
280 };
281 
282 enum ta_ras_gfx_subblock {
283 	/*CPC*/
284 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
285 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
286 	TA_RAS_BLOCK__GFX_CPC_UCODE,
287 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
288 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
289 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
290 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
291 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
292 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
293 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
294 	/* CPF*/
295 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
296 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
297 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
298 	TA_RAS_BLOCK__GFX_CPF_TAG,
299 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
300 	/* CPG*/
301 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
302 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
303 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
304 	TA_RAS_BLOCK__GFX_CPG_TAG,
305 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
306 	/* GDS*/
307 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
308 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
309 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
310 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
311 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
312 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
313 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
314 	/* SPI*/
315 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
316 	/* SQ*/
317 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
318 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
319 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
320 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
321 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
322 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
323 	/* SQC (3 ranges)*/
324 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
325 	/* SQC range 0*/
326 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
327 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
328 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
329 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
330 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
331 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
332 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
333 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
334 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
335 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
336 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
337 	/* SQC range 1*/
338 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
339 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
340 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
341 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
342 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
343 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
344 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
345 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
346 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
347 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
348 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
349 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
350 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
351 	/* SQC range 2*/
352 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
353 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
354 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
355 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
356 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
357 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
358 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
359 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
360 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
361 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
362 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
363 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
364 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
365 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
366 	/* TA*/
367 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
368 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
369 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
370 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
371 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
372 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
373 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
374 	/* TCA*/
375 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
376 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
377 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
378 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
379 	/* TCC (5 sub-ranges)*/
380 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
381 	/* TCC range 0*/
382 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
383 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
384 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
385 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
386 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
387 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
388 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
389 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
390 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
391 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
392 	/* TCC range 1*/
393 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
394 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
395 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
396 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
397 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
398 	/* TCC range 2*/
399 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
400 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
401 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
402 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
403 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
404 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
405 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
406 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
407 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
408 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
409 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
410 	/* TCC range 3*/
411 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
412 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
413 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
414 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
415 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
416 	/* TCC range 4*/
417 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
418 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
419 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
420 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
421 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
422 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
423 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
424 	/* TCI*/
425 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
426 	/* TCP*/
427 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
428 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
429 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
430 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
431 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
432 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
433 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
434 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
435 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
436 	/* TD*/
437 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
438 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
439 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
440 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
441 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
442 	/* EA (3 sub-ranges)*/
443 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
444 	/* EA range 0*/
445 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
446 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
447 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
448 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
449 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
450 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
451 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
452 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
453 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
454 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
455 	/* EA range 1*/
456 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
457 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
458 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
459 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
460 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
461 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
462 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
463 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
464 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
465 	/* EA range 2*/
466 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
467 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
468 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
469 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
470 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
471 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
472 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
473 	/* UTC VM L2 bank*/
474 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
475 	/* UTC VM walker*/
476 	TA_RAS_BLOCK__UTC_VML2_WALKER,
477 	/* UTC ATC L2 2MB cache*/
478 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
479 	/* UTC ATC L2 4KB cache*/
480 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
481 	TA_RAS_BLOCK__GFX_MAX
482 };
483 
484 struct ras_gfx_subblock {
485 	unsigned char *name;
486 	int ta_subblock;
487 	int hw_supported_error_type;
488 	int sw_supported_error_type;
489 };
490 
491 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
492 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
493 		#subblock,                                                     \
494 		TA_RAS_BLOCK__##subblock,                                      \
495 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
496 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
497 	}
498 
499 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
500 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
508 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
509 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
510 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
511 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
512 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
513 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
514 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
515 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
516 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
517 			     0),
518 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
519 			     0),
520 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
521 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
522 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
523 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
524 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
525 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
526 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
527 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
528 			     0, 0),
529 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
530 			     0),
531 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
532 			     0, 0),
533 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
534 			     0),
535 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
536 			     0, 0),
537 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
538 			     0),
539 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
540 			     1),
541 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
542 			     0, 0, 0),
543 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
544 			     0),
545 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
546 			     0),
547 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
548 			     0),
549 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
550 			     0),
551 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
552 			     0),
553 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
554 			     0, 0),
555 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
556 			     0),
557 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
558 			     0),
559 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
560 			     0, 0, 0),
561 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
562 			     0),
563 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
564 			     0),
565 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
566 			     0),
567 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
568 			     0),
569 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
570 			     0),
571 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
572 			     0, 0),
573 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
574 			     0),
575 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
576 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
577 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
578 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
579 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
580 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
581 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
582 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
583 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
584 			     1),
585 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
586 			     1),
587 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
588 			     1),
589 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
590 			     0),
591 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
592 			     0),
593 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
594 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
595 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
596 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
597 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
598 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
599 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
600 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
601 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
602 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
603 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
604 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
605 			     0),
606 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
607 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
608 			     0),
609 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
610 			     0, 0),
611 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
612 			     0),
613 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
614 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
615 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
616 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
617 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
618 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
619 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
620 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
621 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
622 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
623 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
624 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
625 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
626 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
627 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
628 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
629 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
630 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
631 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
632 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
633 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
634 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
635 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
636 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
637 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
638 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
639 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
640 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
641 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
642 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
643 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
644 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
645 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
646 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
647 };
648 
649 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
650 {
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
657 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
659 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
671 };
672 
673 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
674 {
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
682 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
683 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
684 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
685 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
693 };
694 
695 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
696 {
697 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
698 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
699 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
700 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
701 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
702 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
703 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
705 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
706 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
707 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
708 };
709 
710 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
711 {
712 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
713 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
714 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
715 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
716 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
717 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
718 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
719 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
720 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
721 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
722 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
723 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
724 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
725 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
726 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
727 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
728 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
729 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
730 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
731 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
732 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
733 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
734 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
735 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
736 };
737 
738 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
739 {
740 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
741 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
742 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
743 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
744 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
745 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
746 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
747 };
748 
749 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
750 {
751 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
752 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
753 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
754 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
755 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
756 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
757 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
758 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
759 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
760 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
761 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
762 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
763 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
764 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
765 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
766 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
767 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
768 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
769 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
770 };
771 
772 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
773 {
774 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
775 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
776 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
777 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
778 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
779 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
780 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
781 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
782 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
783 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
784 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
785 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
786 };
787 
788 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
789 {
790 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
791 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
792 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
793 };
794 
795 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
796 {
797 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
798 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
799 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
800 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
801 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
802 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
803 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
804 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
805 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
806 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
807 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
808 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
809 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
810 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
811 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
812 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
813 };
814 
815 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
816 {
817 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
818 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
819 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
820 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
821 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
822 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
823 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
824 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
825 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
826 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
827 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
828 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
829 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
830 };
831 
832 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
833 {
834 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
835 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
836 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
837 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
838 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
839 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
840 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
841 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
842 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
843 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
844 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
845 };
846 
847 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
848 	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
849 	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
850 };
851 
852 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
853 {
854 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
855 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
856 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
857 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
858 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
859 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
860 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
861 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
862 };
863 
864 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
865 {
866 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
867 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
868 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
869 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
870 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
871 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
872 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
873 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
874 };
875 
876 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
877 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
878 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
879 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
880 
881 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
882 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
883 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
884 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
885 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
886 				struct amdgpu_cu_info *cu_info);
887 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
888 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
889 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
890 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
891 					  void *ras_error_status);
892 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
893 				     void *inject_if, uint32_t instance_mask);
894 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
895 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
896 					      unsigned int vmid);
897 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
898 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
899 
gfx_v9_0_kiq_set_resources(struct amdgpu_ring * kiq_ring,uint64_t queue_mask)900 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
901 				uint64_t queue_mask)
902 {
903 	struct amdgpu_device *adev = kiq_ring->adev;
904 	u64 shader_mc_addr;
905 
906 	/* Cleaner shader MC address */
907 	shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
908 
909 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
910 	amdgpu_ring_write(kiq_ring,
911 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
912 		/* vmid_mask:0* queue_type:0 (KIQ) */
913 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
914 	amdgpu_ring_write(kiq_ring,
915 			lower_32_bits(queue_mask));	/* queue mask lo */
916 	amdgpu_ring_write(kiq_ring,
917 			upper_32_bits(queue_mask));	/* queue mask hi */
918 	amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
919 	amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
920 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
921 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
922 }
923 
gfx_v9_0_kiq_map_queues(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring)924 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
925 				 struct amdgpu_ring *ring)
926 {
927 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
928 	uint64_t wptr_addr = ring->wptr_gpu_addr;
929 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
930 
931 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
932 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
933 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
934 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
935 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
936 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
937 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
938 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
939 			 /*queue_type: normal compute queue */
940 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
941 			 /* alloc format: all_on_one_pipe */
942 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
943 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
944 			 /* num_queues: must be 1 */
945 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
946 	amdgpu_ring_write(kiq_ring,
947 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
948 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
949 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
950 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
951 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
952 }
953 
gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring,enum amdgpu_unmap_queues_action action,u64 gpu_addr,u64 seq)954 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
955 				   struct amdgpu_ring *ring,
956 				   enum amdgpu_unmap_queues_action action,
957 				   u64 gpu_addr, u64 seq)
958 {
959 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
960 
961 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
962 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
963 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
964 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
965 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
966 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
967 	amdgpu_ring_write(kiq_ring,
968 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
969 
970 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
971 		amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
972 		amdgpu_ring_write(kiq_ring, 0);
973 		amdgpu_ring_write(kiq_ring, 0);
974 
975 	} else {
976 		amdgpu_ring_write(kiq_ring, 0);
977 		amdgpu_ring_write(kiq_ring, 0);
978 		amdgpu_ring_write(kiq_ring, 0);
979 	}
980 }
981 
gfx_v9_0_kiq_query_status(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring,u64 addr,u64 seq)982 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
983 				   struct amdgpu_ring *ring,
984 				   u64 addr,
985 				   u64 seq)
986 {
987 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
988 
989 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
990 	amdgpu_ring_write(kiq_ring,
991 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
992 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
993 			  PACKET3_QUERY_STATUS_COMMAND(2));
994 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
995 	amdgpu_ring_write(kiq_ring,
996 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
997 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
998 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
999 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
1000 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
1001 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
1002 }
1003 
gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring * kiq_ring,uint16_t pasid,uint32_t flush_type,bool all_hub)1004 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
1005 				uint16_t pasid, uint32_t flush_type,
1006 				bool all_hub)
1007 {
1008 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
1009 	amdgpu_ring_write(kiq_ring,
1010 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
1011 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
1012 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
1013 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
1014 }
1015 
1016 
gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring * kiq_ring,uint32_t queue_type,uint32_t me_id,uint32_t pipe_id,uint32_t queue_id,uint32_t xcc_id,uint32_t vmid)1017 static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
1018 					uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
1019 					uint32_t xcc_id, uint32_t vmid)
1020 {
1021 	struct amdgpu_device *adev = kiq_ring->adev;
1022 	unsigned i;
1023 
1024 	/* enter save mode */
1025 	amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
1026 	mutex_lock(&adev->srbm_mutex);
1027 	soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0);
1028 
1029 	if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
1030 		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
1031 		WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
1032 		/* wait till dequeue take effects */
1033 		for (i = 0; i < adev->usec_timeout; i++) {
1034 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
1035 				break;
1036 			udelay(1);
1037 		}
1038 		if (i >= adev->usec_timeout)
1039 			dev_err(adev->dev, "fail to wait on hqd deactive\n");
1040 	} else {
1041 		dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
1042 	}
1043 
1044 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
1045 	mutex_unlock(&adev->srbm_mutex);
1046 	/* exit safe mode */
1047 	amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
1048 }
1049 
1050 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
1051 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
1052 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
1053 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
1054 	.kiq_query_status = gfx_v9_0_kiq_query_status,
1055 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
1056 	.kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue,
1057 	.set_resources_size = 8,
1058 	.map_queues_size = 7,
1059 	.unmap_queues_size = 6,
1060 	.query_status_size = 7,
1061 	.invalidate_tlbs_size = 2,
1062 };
1063 
gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device * adev)1064 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
1065 {
1066 	adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
1067 }
1068 
gfx_v9_0_init_golden_registers(struct amdgpu_device * adev)1069 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
1070 {
1071 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1072 	case IP_VERSION(9, 0, 1):
1073 		soc15_program_register_sequence(adev,
1074 						golden_settings_gc_9_0,
1075 						ARRAY_SIZE(golden_settings_gc_9_0));
1076 		soc15_program_register_sequence(adev,
1077 						golden_settings_gc_9_0_vg10,
1078 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
1079 		break;
1080 	case IP_VERSION(9, 2, 1):
1081 		soc15_program_register_sequence(adev,
1082 						golden_settings_gc_9_2_1,
1083 						ARRAY_SIZE(golden_settings_gc_9_2_1));
1084 		soc15_program_register_sequence(adev,
1085 						golden_settings_gc_9_2_1_vg12,
1086 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
1087 		break;
1088 	case IP_VERSION(9, 4, 0):
1089 		soc15_program_register_sequence(adev,
1090 						golden_settings_gc_9_0,
1091 						ARRAY_SIZE(golden_settings_gc_9_0));
1092 		soc15_program_register_sequence(adev,
1093 						golden_settings_gc_9_0_vg20,
1094 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
1095 		break;
1096 	case IP_VERSION(9, 4, 1):
1097 		soc15_program_register_sequence(adev,
1098 						golden_settings_gc_9_4_1_arct,
1099 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
1100 		break;
1101 	case IP_VERSION(9, 2, 2):
1102 	case IP_VERSION(9, 1, 0):
1103 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
1104 						ARRAY_SIZE(golden_settings_gc_9_1));
1105 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1106 			soc15_program_register_sequence(adev,
1107 							golden_settings_gc_9_1_rv2,
1108 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
1109 		else
1110 			soc15_program_register_sequence(adev,
1111 							golden_settings_gc_9_1_rv1,
1112 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
1113 		break;
1114 	 case IP_VERSION(9, 3, 0):
1115 		soc15_program_register_sequence(adev,
1116 						golden_settings_gc_9_1_rn,
1117 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
1118 		return; /* for renoir, don't need common goldensetting */
1119 	case IP_VERSION(9, 4, 2):
1120 		gfx_v9_4_2_init_golden_registers(adev,
1121 						 adev->smuio.funcs->get_die_id(adev));
1122 		break;
1123 	default:
1124 		break;
1125 	}
1126 
1127 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1128 	    (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)))
1129 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1130 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1131 }
1132 
gfx_v9_0_write_data_to_reg(struct amdgpu_ring * ring,int eng_sel,bool wc,uint32_t reg,uint32_t val)1133 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1134 				       bool wc, uint32_t reg, uint32_t val)
1135 {
1136 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1137 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1138 				WRITE_DATA_DST_SEL(0) |
1139 				(wc ? WR_CONFIRM : 0));
1140 	amdgpu_ring_write(ring, reg);
1141 	amdgpu_ring_write(ring, 0);
1142 	amdgpu_ring_write(ring, val);
1143 }
1144 
gfx_v9_0_wait_reg_mem(struct amdgpu_ring * ring,int eng_sel,int mem_space,int opt,uint32_t addr0,uint32_t addr1,uint32_t ref,uint32_t mask,uint32_t inv)1145 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1146 				  int mem_space, int opt, uint32_t addr0,
1147 				  uint32_t addr1, uint32_t ref, uint32_t mask,
1148 				  uint32_t inv)
1149 {
1150 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1151 	amdgpu_ring_write(ring,
1152 				 /* memory (1) or register (0) */
1153 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1154 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
1155 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1156 				 WAIT_REG_MEM_ENGINE(eng_sel)));
1157 
1158 	if (mem_space)
1159 		BUG_ON(addr0 & 0x3); /* Dword align */
1160 	amdgpu_ring_write(ring, addr0);
1161 	amdgpu_ring_write(ring, addr1);
1162 	amdgpu_ring_write(ring, ref);
1163 	amdgpu_ring_write(ring, mask);
1164 	amdgpu_ring_write(ring, inv); /* poll interval */
1165 }
1166 
gfx_v9_0_ring_test_ring(struct amdgpu_ring * ring)1167 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1168 {
1169 	struct amdgpu_device *adev = ring->adev;
1170 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1171 	uint32_t tmp = 0;
1172 	unsigned i;
1173 	int r;
1174 
1175 	WREG32(scratch, 0xCAFEDEAD);
1176 	r = amdgpu_ring_alloc(ring, 3);
1177 	if (r)
1178 		return r;
1179 
1180 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1181 	amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1182 	amdgpu_ring_write(ring, 0xDEADBEEF);
1183 	amdgpu_ring_commit(ring);
1184 
1185 	for (i = 0; i < adev->usec_timeout; i++) {
1186 		tmp = RREG32(scratch);
1187 		if (tmp == 0xDEADBEEF)
1188 			break;
1189 		udelay(1);
1190 	}
1191 
1192 	if (i >= adev->usec_timeout)
1193 		r = -ETIMEDOUT;
1194 	return r;
1195 }
1196 
gfx_v9_0_ring_test_ib(struct amdgpu_ring * ring,long timeout)1197 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1198 {
1199 	struct amdgpu_device *adev = ring->adev;
1200 	struct amdgpu_ib ib;
1201 	struct dma_fence *f = NULL;
1202 
1203 	unsigned index;
1204 	uint64_t gpu_addr;
1205 	uint32_t tmp;
1206 	long r;
1207 
1208 	r = amdgpu_device_wb_get(adev, &index);
1209 	if (r)
1210 		return r;
1211 
1212 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1213 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1214 	memset(&ib, 0, sizeof(ib));
1215 
1216 	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
1217 	if (r)
1218 		goto err1;
1219 
1220 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1221 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1222 	ib.ptr[2] = lower_32_bits(gpu_addr);
1223 	ib.ptr[3] = upper_32_bits(gpu_addr);
1224 	ib.ptr[4] = 0xDEADBEEF;
1225 	ib.length_dw = 5;
1226 
1227 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1228 	if (r)
1229 		goto err2;
1230 
1231 	r = dma_fence_wait_timeout(f, false, timeout);
1232 	if (r == 0) {
1233 		r = -ETIMEDOUT;
1234 		goto err2;
1235 	} else if (r < 0) {
1236 		goto err2;
1237 	}
1238 
1239 	tmp = adev->wb.wb[index];
1240 	if (tmp == 0xDEADBEEF)
1241 		r = 0;
1242 	else
1243 		r = -EINVAL;
1244 
1245 err2:
1246 	amdgpu_ib_free(adev, &ib, NULL);
1247 	dma_fence_put(f);
1248 err1:
1249 	amdgpu_device_wb_free(adev, index);
1250 	return r;
1251 }
1252 
1253 
gfx_v9_0_free_microcode(struct amdgpu_device * adev)1254 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1255 {
1256 	amdgpu_ucode_release(&adev->gfx.pfp_fw);
1257 	amdgpu_ucode_release(&adev->gfx.me_fw);
1258 	amdgpu_ucode_release(&adev->gfx.ce_fw);
1259 	amdgpu_ucode_release(&adev->gfx.rlc_fw);
1260 	amdgpu_ucode_release(&adev->gfx.mec_fw);
1261 	amdgpu_ucode_release(&adev->gfx.mec2_fw);
1262 
1263 	kfree(adev->gfx.rlc.register_list_format);
1264 }
1265 
gfx_v9_0_check_fw_write_wait(struct amdgpu_device * adev)1266 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1267 {
1268 	adev->gfx.me_fw_write_wait = false;
1269 	adev->gfx.mec_fw_write_wait = false;
1270 
1271 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1272 	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1273 	     (adev->gfx.mec_feature_version < 46) ||
1274 	     (adev->gfx.pfp_fw_version < 0x000000b7) ||
1275 	     (adev->gfx.pfp_feature_version < 46)))
1276 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1277 
1278 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1279 	case IP_VERSION(9, 0, 1):
1280 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1281 		    (adev->gfx.me_feature_version >= 42) &&
1282 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1283 		    (adev->gfx.pfp_feature_version >= 42))
1284 			adev->gfx.me_fw_write_wait = true;
1285 
1286 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1287 		    (adev->gfx.mec_feature_version >= 42))
1288 			adev->gfx.mec_fw_write_wait = true;
1289 		break;
1290 	case IP_VERSION(9, 2, 1):
1291 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1292 		    (adev->gfx.me_feature_version >= 44) &&
1293 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1294 		    (adev->gfx.pfp_feature_version >= 44))
1295 			adev->gfx.me_fw_write_wait = true;
1296 
1297 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1298 		    (adev->gfx.mec_feature_version >= 44))
1299 			adev->gfx.mec_fw_write_wait = true;
1300 		break;
1301 	case IP_VERSION(9, 4, 0):
1302 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1303 		    (adev->gfx.me_feature_version >= 44) &&
1304 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1305 		    (adev->gfx.pfp_feature_version >= 44))
1306 			adev->gfx.me_fw_write_wait = true;
1307 
1308 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1309 		    (adev->gfx.mec_feature_version >= 44))
1310 			adev->gfx.mec_fw_write_wait = true;
1311 		break;
1312 	case IP_VERSION(9, 1, 0):
1313 	case IP_VERSION(9, 2, 2):
1314 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1315 		    (adev->gfx.me_feature_version >= 42) &&
1316 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1317 		    (adev->gfx.pfp_feature_version >= 42))
1318 			adev->gfx.me_fw_write_wait = true;
1319 
1320 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1321 		    (adev->gfx.mec_feature_version >= 42))
1322 			adev->gfx.mec_fw_write_wait = true;
1323 		break;
1324 	default:
1325 		adev->gfx.me_fw_write_wait = true;
1326 		adev->gfx.mec_fw_write_wait = true;
1327 		break;
1328 	}
1329 }
1330 
1331 struct amdgpu_gfxoff_quirk {
1332 	u16 chip_vendor;
1333 	u16 chip_device;
1334 	u16 subsys_vendor;
1335 	u16 subsys_device;
1336 	u8 revision;
1337 };
1338 
1339 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1340 	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1341 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1342 	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1343 	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1344 	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1345 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1346 	/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1347 	{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1348 	/* https://bbs.openkylin.top/t/topic/171497 */
1349 	{ 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 },
1350 	/* HP 705G4 DM with R5 2400G */
1351 	{ 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 },
1352 	{ 0, 0, 0, 0, 0 },
1353 };
1354 
gfx_v9_0_should_disable_gfxoff(struct pci_dev * pdev)1355 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1356 {
1357 	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1358 
1359 	while (p && p->chip_device != 0) {
1360 		if (pdev->vendor == p->chip_vendor &&
1361 		    pdev->device == p->chip_device &&
1362 		    pdev->subsystem_vendor == p->subsys_vendor &&
1363 		    pdev->subsystem_device == p->subsys_device &&
1364 		    pdev->revision == p->revision) {
1365 			return true;
1366 		}
1367 		++p;
1368 	}
1369 	return false;
1370 }
1371 
is_raven_kicker(struct amdgpu_device * adev)1372 static bool is_raven_kicker(struct amdgpu_device *adev)
1373 {
1374 	if (adev->pm.fw_version >= 0x41e2b)
1375 		return true;
1376 	else
1377 		return false;
1378 }
1379 
check_if_enlarge_doorbell_range(struct amdgpu_device * adev)1380 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1381 {
1382 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) &&
1383 	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1384 	    (adev->gfx.me_feature_version >= 52))
1385 		return true;
1386 	else
1387 		return false;
1388 }
1389 
gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device * adev)1390 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1391 {
1392 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1393 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1394 
1395 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1396 	case IP_VERSION(9, 0, 1):
1397 	case IP_VERSION(9, 2, 1):
1398 	case IP_VERSION(9, 4, 0):
1399 		break;
1400 	case IP_VERSION(9, 2, 2):
1401 	case IP_VERSION(9, 1, 0):
1402 		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1403 		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1404 		    ((!is_raven_kicker(adev) &&
1405 		      adev->gfx.rlc_fw_version < 531) ||
1406 		     (adev->gfx.rlc_feature_version < 1) ||
1407 		     !adev->gfx.rlc.is_rlc_v2_1))
1408 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1409 
1410 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1411 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1412 				AMD_PG_SUPPORT_CP |
1413 				AMD_PG_SUPPORT_RLC_SMU_HS;
1414 		break;
1415 	case IP_VERSION(9, 3, 0):
1416 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1417 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1418 				AMD_PG_SUPPORT_CP |
1419 				AMD_PG_SUPPORT_RLC_SMU_HS;
1420 		break;
1421 	default:
1422 		break;
1423 	}
1424 }
1425 
gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device * adev,char * chip_name)1426 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1427 					  char *chip_name)
1428 {
1429 	int err;
1430 
1431 	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
1432 				   "amdgpu/%s_pfp.bin", chip_name);
1433 	if (err)
1434 		goto out;
1435 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1436 
1437 	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1438 				   "amdgpu/%s_me.bin", chip_name);
1439 	if (err)
1440 		goto out;
1441 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1442 
1443 	err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1444 				   "amdgpu/%s_ce.bin", chip_name);
1445 	if (err)
1446 		goto out;
1447 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1448 
1449 out:
1450 	if (err) {
1451 		amdgpu_ucode_release(&adev->gfx.pfp_fw);
1452 		amdgpu_ucode_release(&adev->gfx.me_fw);
1453 		amdgpu_ucode_release(&adev->gfx.ce_fw);
1454 	}
1455 	return err;
1456 }
1457 
gfx_v9_0_init_rlc_microcode(struct amdgpu_device * adev,char * chip_name)1458 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1459 				       char *chip_name)
1460 {
1461 	int err;
1462 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1463 	uint16_t version_major;
1464 	uint16_t version_minor;
1465 	uint32_t smu_version;
1466 
1467 	/*
1468 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1469 	 * instead of picasso_rlc.bin.
1470 	 * Judgment method:
1471 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1472 	 *          or revision >= 0xD8 && revision <= 0xDF
1473 	 * otherwise is PCO FP5
1474 	 */
1475 	if (!strcmp(chip_name, "picasso") &&
1476 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1477 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1478 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1479 					   "amdgpu/%s_rlc_am4.bin", chip_name);
1480 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1481 		(smu_version >= 0x41e2b))
1482 		/**
1483 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1484 		*/
1485 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1486 					   "amdgpu/%s_kicker_rlc.bin", chip_name);
1487 	else
1488 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1489 					   "amdgpu/%s_rlc.bin", chip_name);
1490 	if (err)
1491 		goto out;
1492 
1493 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1494 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1495 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1496 	err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1497 out:
1498 	if (err)
1499 		amdgpu_ucode_release(&adev->gfx.rlc_fw);
1500 
1501 	return err;
1502 }
1503 
gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device * adev)1504 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1505 {
1506 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
1507 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
1508 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0))
1509 		return false;
1510 
1511 	return true;
1512 }
1513 
gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device * adev,char * chip_name)1514 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1515 					      char *chip_name)
1516 {
1517 	int err;
1518 
1519 	if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1520 		err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1521 					   "amdgpu/%s_sjt_mec.bin", chip_name);
1522 	else
1523 		err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1524 					   "amdgpu/%s_mec.bin", chip_name);
1525 	if (err)
1526 		goto out;
1527 
1528 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1529 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1530 
1531 	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1532 		if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1533 			err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1534 						   "amdgpu/%s_sjt_mec2.bin", chip_name);
1535 		else
1536 			err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1537 						   "amdgpu/%s_mec2.bin", chip_name);
1538 		if (!err) {
1539 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1540 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1541 		} else {
1542 			err = 0;
1543 			amdgpu_ucode_release(&adev->gfx.mec2_fw);
1544 		}
1545 	} else {
1546 		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1547 		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1548 	}
1549 
1550 	gfx_v9_0_check_if_need_gfxoff(adev);
1551 	gfx_v9_0_check_fw_write_wait(adev);
1552 
1553 out:
1554 	if (err)
1555 		amdgpu_ucode_release(&adev->gfx.mec_fw);
1556 	return err;
1557 }
1558 
gfx_v9_0_init_microcode(struct amdgpu_device * adev)1559 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1560 {
1561 	char ucode_prefix[30];
1562 	int r;
1563 
1564 	DRM_DEBUG("\n");
1565 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1566 
1567 	/* No CPG in Arcturus */
1568 	if (adev->gfx.num_gfx_rings) {
1569 		r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
1570 		if (r)
1571 			return r;
1572 	}
1573 
1574 	r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
1575 	if (r)
1576 		return r;
1577 
1578 	r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
1579 	if (r)
1580 		return r;
1581 
1582 	return r;
1583 }
1584 
gfx_v9_0_get_csb_size(struct amdgpu_device * adev)1585 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1586 {
1587 	u32 count = 0;
1588 	const struct cs_section_def *sect = NULL;
1589 	const struct cs_extent_def *ext = NULL;
1590 
1591 	/* begin clear state */
1592 	count += 2;
1593 	/* context control state */
1594 	count += 3;
1595 
1596 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1597 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1598 			if (sect->id == SECT_CONTEXT)
1599 				count += 2 + ext->reg_count;
1600 			else
1601 				return 0;
1602 		}
1603 	}
1604 
1605 	/* end clear state */
1606 	count += 2;
1607 	/* clear state */
1608 	count += 2;
1609 
1610 	return count;
1611 }
1612 
gfx_v9_0_get_csb_buffer(struct amdgpu_device * adev,volatile u32 * buffer)1613 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1614 				    volatile u32 *buffer)
1615 {
1616 	u32 count = 0, i;
1617 	const struct cs_section_def *sect = NULL;
1618 	const struct cs_extent_def *ext = NULL;
1619 
1620 	if (adev->gfx.rlc.cs_data == NULL)
1621 		return;
1622 	if (buffer == NULL)
1623 		return;
1624 
1625 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1626 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1627 
1628 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1629 	buffer[count++] = cpu_to_le32(0x80000000);
1630 	buffer[count++] = cpu_to_le32(0x80000000);
1631 
1632 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1633 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1634 			if (sect->id == SECT_CONTEXT) {
1635 				buffer[count++] =
1636 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1637 				buffer[count++] = cpu_to_le32(ext->reg_index -
1638 						PACKET3_SET_CONTEXT_REG_START);
1639 				for (i = 0; i < ext->reg_count; i++)
1640 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1641 			} else {
1642 				return;
1643 			}
1644 		}
1645 	}
1646 
1647 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1648 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1649 
1650 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1651 	buffer[count++] = cpu_to_le32(0);
1652 }
1653 
gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device * adev)1654 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1655 {
1656 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1657 	uint32_t pg_always_on_cu_num = 2;
1658 	uint32_t always_on_cu_num;
1659 	uint32_t i, j, k;
1660 	uint32_t mask, cu_bitmap, counter;
1661 
1662 	if (adev->flags & AMD_IS_APU)
1663 		always_on_cu_num = 4;
1664 	else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1))
1665 		always_on_cu_num = 8;
1666 	else
1667 		always_on_cu_num = 12;
1668 
1669 	mutex_lock(&adev->grbm_idx_mutex);
1670 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1671 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1672 			mask = 1;
1673 			cu_bitmap = 0;
1674 			counter = 0;
1675 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
1676 
1677 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1678 				if (cu_info->bitmap[0][i][j] & mask) {
1679 					if (counter == pg_always_on_cu_num)
1680 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1681 					if (counter < always_on_cu_num)
1682 						cu_bitmap |= mask;
1683 					else
1684 						break;
1685 					counter++;
1686 				}
1687 				mask <<= 1;
1688 			}
1689 
1690 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1691 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1692 		}
1693 	}
1694 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1695 	mutex_unlock(&adev->grbm_idx_mutex);
1696 }
1697 
gfx_v9_0_init_lbpw(struct amdgpu_device * adev)1698 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1699 {
1700 	uint32_t data;
1701 
1702 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1703 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1704 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1705 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1706 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1707 
1708 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1709 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1710 
1711 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1712 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1713 
1714 	mutex_lock(&adev->grbm_idx_mutex);
1715 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1716 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1717 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1718 
1719 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1720 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1721 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1722 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1723 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1724 
1725 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1726 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1727 	data &= 0x0000FFFF;
1728 	data |= 0x00C00000;
1729 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1730 
1731 	/*
1732 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1733 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1734 	 */
1735 
1736 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1737 	 * but used for RLC_LB_CNTL configuration */
1738 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1739 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1740 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1741 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1742 	mutex_unlock(&adev->grbm_idx_mutex);
1743 
1744 	gfx_v9_0_init_always_on_cu_mask(adev);
1745 }
1746 
gfx_v9_4_init_lbpw(struct amdgpu_device * adev)1747 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1748 {
1749 	uint32_t data;
1750 
1751 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1752 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1753 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1754 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1755 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1756 
1757 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1758 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1759 
1760 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1761 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1762 
1763 	mutex_lock(&adev->grbm_idx_mutex);
1764 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1765 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1766 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1767 
1768 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1769 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1770 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1771 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1772 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1773 
1774 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1775 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1776 	data &= 0x0000FFFF;
1777 	data |= 0x00C00000;
1778 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1779 
1780 	/*
1781 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1782 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1783 	 */
1784 
1785 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1786 	 * but used for RLC_LB_CNTL configuration */
1787 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1788 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1789 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1790 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1791 	mutex_unlock(&adev->grbm_idx_mutex);
1792 
1793 	gfx_v9_0_init_always_on_cu_mask(adev);
1794 }
1795 
gfx_v9_0_enable_lbpw(struct amdgpu_device * adev,bool enable)1796 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1797 {
1798 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1799 }
1800 
gfx_v9_0_cp_jump_table_num(struct amdgpu_device * adev)1801 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1802 {
1803 	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1804 		return 5;
1805 	else
1806 		return 4;
1807 }
1808 
gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device * adev)1809 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1810 {
1811 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1812 
1813 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
1814 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1815 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1816 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1817 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1818 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1819 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1820 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1821 	adev->gfx.rlc.rlcg_reg_access_supported = true;
1822 }
1823 
gfx_v9_0_rlc_init(struct amdgpu_device * adev)1824 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1825 {
1826 	const struct cs_section_def *cs_data;
1827 	int r;
1828 
1829 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1830 
1831 	cs_data = adev->gfx.rlc.cs_data;
1832 
1833 	if (cs_data) {
1834 		/* init clear state block */
1835 		r = amdgpu_gfx_rlc_init_csb(adev);
1836 		if (r)
1837 			return r;
1838 	}
1839 
1840 	if (adev->flags & AMD_IS_APU) {
1841 		/* TODO: double check the cp_table_size for RV */
1842 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1843 		r = amdgpu_gfx_rlc_init_cpt(adev);
1844 		if (r)
1845 			return r;
1846 	}
1847 
1848 	return 0;
1849 }
1850 
gfx_v9_0_mec_fini(struct amdgpu_device * adev)1851 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1852 {
1853 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1854 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1855 }
1856 
gfx_v9_0_mec_init(struct amdgpu_device * adev)1857 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1858 {
1859 	int r;
1860 	u32 *hpd;
1861 	const __le32 *fw_data;
1862 	unsigned fw_size;
1863 	u32 *fw;
1864 	size_t mec_hpd_size;
1865 
1866 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1867 
1868 	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1869 
1870 	/* take ownership of the relevant compute queues */
1871 	amdgpu_gfx_compute_queue_acquire(adev);
1872 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1873 	if (mec_hpd_size) {
1874 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1875 					      AMDGPU_GEM_DOMAIN_VRAM |
1876 					      AMDGPU_GEM_DOMAIN_GTT,
1877 					      &adev->gfx.mec.hpd_eop_obj,
1878 					      &adev->gfx.mec.hpd_eop_gpu_addr,
1879 					      (void **)&hpd);
1880 		if (r) {
1881 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1882 			gfx_v9_0_mec_fini(adev);
1883 			return r;
1884 		}
1885 
1886 		memset(hpd, 0, mec_hpd_size);
1887 
1888 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1889 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1890 	}
1891 
1892 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1893 
1894 	fw_data = (const __le32 *)
1895 		(adev->gfx.mec_fw->data +
1896 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1897 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1898 
1899 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1900 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1901 				      &adev->gfx.mec.mec_fw_obj,
1902 				      &adev->gfx.mec.mec_fw_gpu_addr,
1903 				      (void **)&fw);
1904 	if (r) {
1905 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1906 		gfx_v9_0_mec_fini(adev);
1907 		return r;
1908 	}
1909 
1910 	memcpy(fw, fw_data, fw_size);
1911 
1912 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1913 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1914 
1915 	return 0;
1916 }
1917 
wave_read_ind(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t address)1918 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1919 {
1920 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1921 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1922 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1923 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1924 		(SQ_IND_INDEX__FORCE_READ_MASK));
1925 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1926 }
1927 
wave_read_regs(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t thread,uint32_t regno,uint32_t num,uint32_t * out)1928 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1929 			   uint32_t wave, uint32_t thread,
1930 			   uint32_t regno, uint32_t num, uint32_t *out)
1931 {
1932 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1933 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1934 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1935 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1936 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1937 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1938 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1939 	while (num--)
1940 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1941 }
1942 
gfx_v9_0_read_wave_data(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t * dst,int * no_fields)1943 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1944 {
1945 	/* type 1 wave data */
1946 	dst[(*no_fields)++] = 1;
1947 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1948 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1949 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1950 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1951 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1952 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1953 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1954 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1955 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1956 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1957 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1958 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1959 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1960 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1961 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1962 }
1963 
gfx_v9_0_read_wave_sgprs(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t start,uint32_t size,uint32_t * dst)1964 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1965 				     uint32_t wave, uint32_t start,
1966 				     uint32_t size, uint32_t *dst)
1967 {
1968 	wave_read_regs(
1969 		adev, simd, wave, 0,
1970 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1971 }
1972 
gfx_v9_0_read_wave_vgprs(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t thread,uint32_t start,uint32_t size,uint32_t * dst)1973 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1974 				     uint32_t wave, uint32_t thread,
1975 				     uint32_t start, uint32_t size,
1976 				     uint32_t *dst)
1977 {
1978 	wave_read_regs(
1979 		adev, simd, wave, thread,
1980 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1981 }
1982 
gfx_v9_0_select_me_pipe_q(struct amdgpu_device * adev,u32 me,u32 pipe,u32 q,u32 vm,u32 xcc_id)1983 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1984 				  u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1985 {
1986 	soc15_grbm_select(adev, me, pipe, q, vm, 0);
1987 }
1988 
1989 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1990         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1991         .select_se_sh = &gfx_v9_0_select_se_sh,
1992         .read_wave_data = &gfx_v9_0_read_wave_data,
1993         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1994         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1995         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1996 };
1997 
1998 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
1999 		.ras_error_inject = &gfx_v9_0_ras_error_inject,
2000 		.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2001 		.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2002 };
2003 
2004 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
2005 	.ras_block = {
2006 		.hw_ops = &gfx_v9_0_ras_ops,
2007 	},
2008 };
2009 
gfx_v9_0_gpu_early_init(struct amdgpu_device * adev)2010 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2011 {
2012 	u32 gb_addr_config;
2013 	int err;
2014 
2015 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2016 	case IP_VERSION(9, 0, 1):
2017 		adev->gfx.config.max_hw_contexts = 8;
2018 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2019 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2020 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2021 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2022 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2023 		break;
2024 	case IP_VERSION(9, 2, 1):
2025 		adev->gfx.config.max_hw_contexts = 8;
2026 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2027 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2028 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2029 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2030 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2031 		DRM_INFO("fix gfx.config for vega12\n");
2032 		break;
2033 	case IP_VERSION(9, 4, 0):
2034 		adev->gfx.ras = &gfx_v9_0_ras;
2035 		adev->gfx.config.max_hw_contexts = 8;
2036 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2037 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2038 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2039 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2040 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2041 		gb_addr_config &= ~0xf3e777ff;
2042 		gb_addr_config |= 0x22014042;
2043 		/* check vbios table if gpu info is not available */
2044 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2045 		if (err)
2046 			return err;
2047 		break;
2048 	case IP_VERSION(9, 2, 2):
2049 	case IP_VERSION(9, 1, 0):
2050 		adev->gfx.config.max_hw_contexts = 8;
2051 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2052 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2053 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2054 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2055 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2056 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2057 		else
2058 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2059 		break;
2060 	case IP_VERSION(9, 4, 1):
2061 		adev->gfx.ras = &gfx_v9_4_ras;
2062 		adev->gfx.config.max_hw_contexts = 8;
2063 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2064 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2065 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2066 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2067 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2068 		gb_addr_config &= ~0xf3e777ff;
2069 		gb_addr_config |= 0x22014042;
2070 		break;
2071 	case IP_VERSION(9, 3, 0):
2072 		adev->gfx.config.max_hw_contexts = 8;
2073 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2074 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2075 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2076 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2077 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2078 		gb_addr_config &= ~0xf3e777ff;
2079 		gb_addr_config |= 0x22010042;
2080 		break;
2081 	case IP_VERSION(9, 4, 2):
2082 		adev->gfx.ras = &gfx_v9_4_2_ras;
2083 		adev->gfx.config.max_hw_contexts = 8;
2084 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2085 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2086 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2087 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2088 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2089 		gb_addr_config &= ~0xf3e777ff;
2090 		gb_addr_config |= 0x22014042;
2091 		/* check vbios table if gpu info is not available */
2092 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2093 		if (err)
2094 			return err;
2095 		break;
2096 	default:
2097 		BUG();
2098 		break;
2099 	}
2100 
2101 	adev->gfx.config.gb_addr_config = gb_addr_config;
2102 
2103 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2104 			REG_GET_FIELD(
2105 					adev->gfx.config.gb_addr_config,
2106 					GB_ADDR_CONFIG,
2107 					NUM_PIPES);
2108 
2109 	adev->gfx.config.max_tile_pipes =
2110 		adev->gfx.config.gb_addr_config_fields.num_pipes;
2111 
2112 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2113 			REG_GET_FIELD(
2114 					adev->gfx.config.gb_addr_config,
2115 					GB_ADDR_CONFIG,
2116 					NUM_BANKS);
2117 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2118 			REG_GET_FIELD(
2119 					adev->gfx.config.gb_addr_config,
2120 					GB_ADDR_CONFIG,
2121 					MAX_COMPRESSED_FRAGS);
2122 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2123 			REG_GET_FIELD(
2124 					adev->gfx.config.gb_addr_config,
2125 					GB_ADDR_CONFIG,
2126 					NUM_RB_PER_SE);
2127 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2128 			REG_GET_FIELD(
2129 					adev->gfx.config.gb_addr_config,
2130 					GB_ADDR_CONFIG,
2131 					NUM_SHADER_ENGINES);
2132 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2133 			REG_GET_FIELD(
2134 					adev->gfx.config.gb_addr_config,
2135 					GB_ADDR_CONFIG,
2136 					PIPE_INTERLEAVE_SIZE));
2137 
2138 	return 0;
2139 }
2140 
gfx_v9_0_compute_ring_init(struct amdgpu_device * adev,int ring_id,int mec,int pipe,int queue)2141 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2142 				      int mec, int pipe, int queue)
2143 {
2144 	unsigned irq_type;
2145 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2146 	unsigned int hw_prio;
2147 
2148 	ring = &adev->gfx.compute_ring[ring_id];
2149 
2150 	/* mec0 is me1 */
2151 	ring->me = mec + 1;
2152 	ring->pipe = pipe;
2153 	ring->queue = queue;
2154 
2155 	ring->ring_obj = NULL;
2156 	ring->use_doorbell = true;
2157 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2158 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2159 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2160 	ring->vm_hub = AMDGPU_GFXHUB(0);
2161 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2162 
2163 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2164 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2165 		+ ring->pipe;
2166 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2167 			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2168 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2169 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2170 				hw_prio, NULL);
2171 }
2172 
gfx_v9_0_alloc_ip_dump(struct amdgpu_device * adev)2173 static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev)
2174 {
2175 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
2176 	uint32_t *ptr;
2177 	uint32_t inst;
2178 
2179 	ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
2180 	if (!ptr) {
2181 		DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
2182 		adev->gfx.ip_dump_core = NULL;
2183 	} else {
2184 		adev->gfx.ip_dump_core = ptr;
2185 	}
2186 
2187 	/* Allocate memory for compute queue registers for all the instances */
2188 	reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
2189 	inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
2190 		adev->gfx.mec.num_queue_per_pipe;
2191 
2192 	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
2193 	if (!ptr) {
2194 		DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
2195 		adev->gfx.ip_dump_compute_queues = NULL;
2196 	} else {
2197 		adev->gfx.ip_dump_compute_queues = ptr;
2198 	}
2199 }
2200 
gfx_v9_0_sw_init(void * handle)2201 static int gfx_v9_0_sw_init(void *handle)
2202 {
2203 	int i, j, k, r, ring_id;
2204 	int xcc_id = 0;
2205 	struct amdgpu_ring *ring;
2206 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2207 	unsigned int hw_prio;
2208 
2209 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2210 	case IP_VERSION(9, 0, 1):
2211 	case IP_VERSION(9, 2, 1):
2212 	case IP_VERSION(9, 4, 0):
2213 	case IP_VERSION(9, 2, 2):
2214 	case IP_VERSION(9, 1, 0):
2215 	case IP_VERSION(9, 4, 1):
2216 	case IP_VERSION(9, 3, 0):
2217 	case IP_VERSION(9, 4, 2):
2218 		adev->gfx.mec.num_mec = 2;
2219 		break;
2220 	default:
2221 		adev->gfx.mec.num_mec = 1;
2222 		break;
2223 	}
2224 
2225 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2226 	default:
2227 		adev->gfx.enable_cleaner_shader = false;
2228 		break;
2229 	}
2230 
2231 	adev->gfx.mec.num_pipe_per_mec = 4;
2232 	adev->gfx.mec.num_queue_per_pipe = 8;
2233 
2234 	/* EOP Event */
2235 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2236 	if (r)
2237 		return r;
2238 
2239 	/* Bad opcode Event */
2240 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
2241 			      GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR,
2242 			      &adev->gfx.bad_op_irq);
2243 	if (r)
2244 		return r;
2245 
2246 	/* Privileged reg */
2247 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2248 			      &adev->gfx.priv_reg_irq);
2249 	if (r)
2250 		return r;
2251 
2252 	/* Privileged inst */
2253 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2254 			      &adev->gfx.priv_inst_irq);
2255 	if (r)
2256 		return r;
2257 
2258 	/* ECC error */
2259 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2260 			      &adev->gfx.cp_ecc_error_irq);
2261 	if (r)
2262 		return r;
2263 
2264 	/* FUE error */
2265 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2266 			      &adev->gfx.cp_ecc_error_irq);
2267 	if (r)
2268 		return r;
2269 
2270 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2271 
2272 	if (adev->gfx.rlc.funcs) {
2273 		if (adev->gfx.rlc.funcs->init) {
2274 			r = adev->gfx.rlc.funcs->init(adev);
2275 			if (r) {
2276 				dev_err(adev->dev, "Failed to init rlc BOs!\n");
2277 				return r;
2278 			}
2279 		}
2280 	}
2281 
2282 	r = gfx_v9_0_mec_init(adev);
2283 	if (r) {
2284 		DRM_ERROR("Failed to init MEC BOs!\n");
2285 		return r;
2286 	}
2287 
2288 	/* set up the gfx ring */
2289 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2290 		ring = &adev->gfx.gfx_ring[i];
2291 		ring->ring_obj = NULL;
2292 		if (!i)
2293 			sprintf(ring->name, "gfx");
2294 		else
2295 			sprintf(ring->name, "gfx_%d", i);
2296 		ring->use_doorbell = true;
2297 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2298 
2299 		/* disable scheduler on the real ring */
2300 		ring->no_scheduler = adev->gfx.mcbp;
2301 		ring->vm_hub = AMDGPU_GFXHUB(0);
2302 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2303 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2304 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2305 		if (r)
2306 			return r;
2307 	}
2308 
2309 	/* set up the software rings */
2310 	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2311 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2312 			ring = &adev->gfx.sw_gfx_ring[i];
2313 			ring->ring_obj = NULL;
2314 			sprintf(ring->name, amdgpu_sw_ring_name(i));
2315 			ring->use_doorbell = true;
2316 			ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2317 			ring->is_sw_ring = true;
2318 			hw_prio = amdgpu_sw_ring_priority(i);
2319 			ring->vm_hub = AMDGPU_GFXHUB(0);
2320 			r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2321 					     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2322 					     NULL);
2323 			if (r)
2324 				return r;
2325 			ring->wptr = 0;
2326 		}
2327 
2328 		/* init the muxer and add software rings */
2329 		r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2330 					 GFX9_NUM_SW_GFX_RINGS);
2331 		if (r) {
2332 			DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2333 			return r;
2334 		}
2335 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2336 			r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2337 							&adev->gfx.sw_gfx_ring[i]);
2338 			if (r) {
2339 				DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2340 				return r;
2341 			}
2342 		}
2343 	}
2344 
2345 	/* set up the compute queues - allocate horizontally across pipes */
2346 	ring_id = 0;
2347 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2348 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2349 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2350 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2351 								     k, j))
2352 					continue;
2353 
2354 				r = gfx_v9_0_compute_ring_init(adev,
2355 							       ring_id,
2356 							       i, k, j);
2357 				if (r)
2358 					return r;
2359 
2360 				ring_id++;
2361 			}
2362 		}
2363 	}
2364 
2365 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
2366 	if (r) {
2367 		DRM_ERROR("Failed to init KIQ BOs!\n");
2368 		return r;
2369 	}
2370 
2371 	r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
2372 	if (r)
2373 		return r;
2374 
2375 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2376 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
2377 	if (r)
2378 		return r;
2379 
2380 	adev->gfx.ce_ram_size = 0x8000;
2381 
2382 	r = gfx_v9_0_gpu_early_init(adev);
2383 	if (r)
2384 		return r;
2385 
2386 	if (amdgpu_gfx_ras_sw_init(adev)) {
2387 		dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
2388 		return -EINVAL;
2389 	}
2390 
2391 	gfx_v9_0_alloc_ip_dump(adev);
2392 
2393 	r = amdgpu_gfx_sysfs_isolation_shader_init(adev);
2394 	if (r)
2395 		return r;
2396 
2397 	return 0;
2398 }
2399 
2400 
gfx_v9_0_sw_fini(void * handle)2401 static int gfx_v9_0_sw_fini(void *handle)
2402 {
2403 	int i;
2404 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2405 
2406 	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2407 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2408 			amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2409 		amdgpu_ring_mux_fini(&adev->gfx.muxer);
2410 	}
2411 
2412 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2413 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2414 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2415 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2416 
2417 	amdgpu_gfx_mqd_sw_fini(adev, 0);
2418 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2419 	amdgpu_gfx_kiq_fini(adev, 0);
2420 
2421 	gfx_v9_0_mec_fini(adev);
2422 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2423 				&adev->gfx.rlc.clear_state_gpu_addr,
2424 				(void **)&adev->gfx.rlc.cs_ptr);
2425 	if (adev->flags & AMD_IS_APU) {
2426 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2427 				&adev->gfx.rlc.cp_table_gpu_addr,
2428 				(void **)&adev->gfx.rlc.cp_table_ptr);
2429 	}
2430 	gfx_v9_0_free_microcode(adev);
2431 
2432 	amdgpu_gfx_sysfs_isolation_shader_fini(adev);
2433 
2434 	kfree(adev->gfx.ip_dump_core);
2435 	kfree(adev->gfx.ip_dump_compute_queues);
2436 
2437 	return 0;
2438 }
2439 
2440 
gfx_v9_0_tiling_mode_table_init(struct amdgpu_device * adev)2441 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2442 {
2443 	/* TODO */
2444 }
2445 
gfx_v9_0_select_se_sh(struct amdgpu_device * adev,u32 se_num,u32 sh_num,u32 instance,int xcc_id)2446 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2447 			   u32 instance, int xcc_id)
2448 {
2449 	u32 data;
2450 
2451 	if (instance == 0xffffffff)
2452 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2453 	else
2454 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2455 
2456 	if (se_num == 0xffffffff)
2457 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2458 	else
2459 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2460 
2461 	if (sh_num == 0xffffffff)
2462 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2463 	else
2464 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2465 
2466 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2467 }
2468 
gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device * adev)2469 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2470 {
2471 	u32 data, mask;
2472 
2473 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2474 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2475 
2476 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2477 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2478 
2479 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2480 					 adev->gfx.config.max_sh_per_se);
2481 
2482 	return (~data) & mask;
2483 }
2484 
gfx_v9_0_setup_rb(struct amdgpu_device * adev)2485 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2486 {
2487 	int i, j;
2488 	u32 data;
2489 	u32 active_rbs = 0;
2490 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2491 					adev->gfx.config.max_sh_per_se;
2492 
2493 	mutex_lock(&adev->grbm_idx_mutex);
2494 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2495 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2496 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2497 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2498 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2499 					       rb_bitmap_width_per_sh);
2500 		}
2501 	}
2502 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2503 	mutex_unlock(&adev->grbm_idx_mutex);
2504 
2505 	adev->gfx.config.backend_enable_mask = active_rbs;
2506 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2507 }
2508 
gfx_v9_0_debug_trap_config_init(struct amdgpu_device * adev,uint32_t first_vmid,uint32_t last_vmid)2509 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
2510 				uint32_t first_vmid,
2511 				uint32_t last_vmid)
2512 {
2513 	uint32_t data;
2514 	uint32_t trap_config_vmid_mask = 0;
2515 	int i;
2516 
2517 	/* Calculate trap config vmid mask */
2518 	for (i = first_vmid; i < last_vmid; i++)
2519 		trap_config_vmid_mask |= (1 << i);
2520 
2521 	data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
2522 			VMID_SEL, trap_config_vmid_mask);
2523 	data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
2524 			TRAP_EN, 1);
2525 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
2526 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
2527 
2528 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
2529 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
2530 }
2531 
2532 #define DEFAULT_SH_MEM_BASES	(0x6000)
gfx_v9_0_init_compute_vmid(struct amdgpu_device * adev)2533 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2534 {
2535 	int i;
2536 	uint32_t sh_mem_config;
2537 	uint32_t sh_mem_bases;
2538 
2539 	/*
2540 	 * Configure apertures:
2541 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2542 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2543 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2544 	 */
2545 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2546 
2547 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2548 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2549 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2550 
2551 	mutex_lock(&adev->srbm_mutex);
2552 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2553 		soc15_grbm_select(adev, 0, 0, 0, i, 0);
2554 		/* CP and shaders */
2555 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2556 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2557 	}
2558 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2559 	mutex_unlock(&adev->srbm_mutex);
2560 
2561 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2562 	   access. These should be enabled by FW for target VMIDs. */
2563 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2564 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2565 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2566 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2567 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2568 	}
2569 }
2570 
gfx_v9_0_init_gds_vmid(struct amdgpu_device * adev)2571 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2572 {
2573 	int vmid;
2574 
2575 	/*
2576 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2577 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2578 	 * the driver can enable them for graphics. VMID0 should maintain
2579 	 * access so that HWS firmware can save/restore entries.
2580 	 */
2581 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2582 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2583 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2584 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2585 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2586 	}
2587 }
2588 
gfx_v9_0_init_sq_config(struct amdgpu_device * adev)2589 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2590 {
2591 	uint32_t tmp;
2592 
2593 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2594 	case IP_VERSION(9, 4, 1):
2595 		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2596 		tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
2597 				!READ_ONCE(adev->barrier_has_auto_waitcnt));
2598 		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2599 		break;
2600 	default:
2601 		break;
2602 	}
2603 }
2604 
gfx_v9_0_constants_init(struct amdgpu_device * adev)2605 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2606 {
2607 	u32 tmp;
2608 	int i;
2609 
2610 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2611 
2612 	gfx_v9_0_tiling_mode_table_init(adev);
2613 
2614 	if (adev->gfx.num_gfx_rings)
2615 		gfx_v9_0_setup_rb(adev);
2616 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2617 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2618 
2619 	/* XXX SH_MEM regs */
2620 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2621 	mutex_lock(&adev->srbm_mutex);
2622 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2623 		soc15_grbm_select(adev, 0, 0, 0, i, 0);
2624 		/* CP and shaders */
2625 		if (i == 0) {
2626 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2627 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2628 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2629 					    !!adev->gmc.noretry);
2630 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2631 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2632 		} else {
2633 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2634 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2635 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2636 					    !!adev->gmc.noretry);
2637 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2638 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2639 				(adev->gmc.private_aperture_start >> 48));
2640 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2641 				(adev->gmc.shared_aperture_start >> 48));
2642 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2643 		}
2644 	}
2645 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2646 
2647 	mutex_unlock(&adev->srbm_mutex);
2648 
2649 	gfx_v9_0_init_compute_vmid(adev);
2650 	gfx_v9_0_init_gds_vmid(adev);
2651 	gfx_v9_0_init_sq_config(adev);
2652 }
2653 
gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device * adev)2654 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2655 {
2656 	u32 i, j, k;
2657 	u32 mask;
2658 
2659 	mutex_lock(&adev->grbm_idx_mutex);
2660 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2661 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2662 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2663 			for (k = 0; k < adev->usec_timeout; k++) {
2664 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2665 					break;
2666 				udelay(1);
2667 			}
2668 			if (k == adev->usec_timeout) {
2669 				amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2670 						      0xffffffff, 0xffffffff, 0);
2671 				mutex_unlock(&adev->grbm_idx_mutex);
2672 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2673 					 i, j);
2674 				return;
2675 			}
2676 		}
2677 	}
2678 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2679 	mutex_unlock(&adev->grbm_idx_mutex);
2680 
2681 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2682 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2683 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2684 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2685 	for (k = 0; k < adev->usec_timeout; k++) {
2686 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2687 			break;
2688 		udelay(1);
2689 	}
2690 }
2691 
gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device * adev,bool enable)2692 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2693 					       bool enable)
2694 {
2695 	u32 tmp;
2696 
2697 	/* These interrupts should be enabled to drive DS clock */
2698 
2699 	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2700 
2701 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2702 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2703 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2704 	if (adev->gfx.num_gfx_rings)
2705 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2706 
2707 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2708 }
2709 
gfx_v9_0_init_csb(struct amdgpu_device * adev)2710 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2711 {
2712 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2713 	/* csib */
2714 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2715 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2716 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2717 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2718 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2719 			adev->gfx.rlc.clear_state_size);
2720 }
2721 
gfx_v9_1_parse_ind_reg_list(int * register_list_format,int indirect_offset,int list_size,int * unique_indirect_regs,int unique_indirect_reg_count,int * indirect_start_offsets,int * indirect_start_offsets_count,int max_start_offsets_count)2722 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2723 				int indirect_offset,
2724 				int list_size,
2725 				int *unique_indirect_regs,
2726 				int unique_indirect_reg_count,
2727 				int *indirect_start_offsets,
2728 				int *indirect_start_offsets_count,
2729 				int max_start_offsets_count)
2730 {
2731 	int idx;
2732 
2733 	for (; indirect_offset < list_size; indirect_offset++) {
2734 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2735 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2736 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2737 
2738 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2739 			indirect_offset += 2;
2740 
2741 			/* look for the matching indice */
2742 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2743 				if (unique_indirect_regs[idx] ==
2744 					register_list_format[indirect_offset] ||
2745 					!unique_indirect_regs[idx])
2746 					break;
2747 			}
2748 
2749 			BUG_ON(idx >= unique_indirect_reg_count);
2750 
2751 			if (!unique_indirect_regs[idx])
2752 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2753 
2754 			indirect_offset++;
2755 		}
2756 	}
2757 }
2758 
gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device * adev)2759 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2760 {
2761 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2762 	int unique_indirect_reg_count = 0;
2763 
2764 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2765 	int indirect_start_offsets_count = 0;
2766 
2767 	int list_size = 0;
2768 	int i = 0, j = 0;
2769 	u32 tmp = 0;
2770 
2771 	u32 *register_list_format =
2772 		kmemdup(adev->gfx.rlc.register_list_format,
2773 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2774 	if (!register_list_format)
2775 		return -ENOMEM;
2776 
2777 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2778 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2779 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2780 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2781 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2782 				    unique_indirect_regs,
2783 				    unique_indirect_reg_count,
2784 				    indirect_start_offsets,
2785 				    &indirect_start_offsets_count,
2786 				    ARRAY_SIZE(indirect_start_offsets));
2787 
2788 	/* enable auto inc in case it is disabled */
2789 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2790 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2791 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2792 
2793 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2794 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2795 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2796 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2797 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2798 			adev->gfx.rlc.register_restore[i]);
2799 
2800 	/* load indirect register */
2801 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2802 		adev->gfx.rlc.reg_list_format_start);
2803 
2804 	/* direct register portion */
2805 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2806 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2807 			register_list_format[i]);
2808 
2809 	/* indirect register portion */
2810 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2811 		if (register_list_format[i] == 0xFFFFFFFF) {
2812 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2813 			continue;
2814 		}
2815 
2816 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2817 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2818 
2819 		for (j = 0; j < unique_indirect_reg_count; j++) {
2820 			if (register_list_format[i] == unique_indirect_regs[j]) {
2821 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2822 				break;
2823 			}
2824 		}
2825 
2826 		BUG_ON(j >= unique_indirect_reg_count);
2827 
2828 		i++;
2829 	}
2830 
2831 	/* set save/restore list size */
2832 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2833 	list_size = list_size >> 1;
2834 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2835 		adev->gfx.rlc.reg_restore_list_size);
2836 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2837 
2838 	/* write the starting offsets to RLC scratch ram */
2839 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2840 		adev->gfx.rlc.starting_offsets_start);
2841 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2842 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2843 		       indirect_start_offsets[i]);
2844 
2845 	/* load unique indirect regs*/
2846 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2847 		if (unique_indirect_regs[i] != 0) {
2848 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2849 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2850 			       unique_indirect_regs[i] & 0x3FFFF);
2851 
2852 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2853 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2854 			       unique_indirect_regs[i] >> 20);
2855 		}
2856 	}
2857 
2858 	kfree(register_list_format);
2859 	return 0;
2860 }
2861 
gfx_v9_0_enable_save_restore_machine(struct amdgpu_device * adev)2862 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2863 {
2864 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2865 }
2866 
pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device * adev,bool enable)2867 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2868 					     bool enable)
2869 {
2870 	uint32_t data = 0;
2871 	uint32_t default_data = 0;
2872 
2873 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2874 	if (enable) {
2875 		/* enable GFXIP control over CGPG */
2876 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2877 		if(default_data != data)
2878 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2879 
2880 		/* update status */
2881 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2882 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2883 		if(default_data != data)
2884 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2885 	} else {
2886 		/* restore GFXIP control over GCPG */
2887 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2888 		if(default_data != data)
2889 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2890 	}
2891 }
2892 
gfx_v9_0_init_gfx_power_gating(struct amdgpu_device * adev)2893 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2894 {
2895 	uint32_t data = 0;
2896 
2897 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2898 			      AMD_PG_SUPPORT_GFX_SMG |
2899 			      AMD_PG_SUPPORT_GFX_DMG)) {
2900 		/* init IDLE_POLL_COUNT = 60 */
2901 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2902 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2903 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2904 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2905 
2906 		/* init RLC PG Delay */
2907 		data = 0;
2908 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2909 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2910 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2911 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2912 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2913 
2914 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2915 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2916 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2917 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2918 
2919 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2920 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2921 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2922 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2923 
2924 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2925 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2926 
2927 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2928 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2929 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2930 		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0))
2931 			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2932 	}
2933 }
2934 
gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device * adev,bool enable)2935 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2936 						bool enable)
2937 {
2938 	uint32_t data = 0;
2939 	uint32_t default_data = 0;
2940 
2941 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2942 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2943 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2944 			     enable ? 1 : 0);
2945 	if (default_data != data)
2946 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2947 }
2948 
gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device * adev,bool enable)2949 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2950 						bool enable)
2951 {
2952 	uint32_t data = 0;
2953 	uint32_t default_data = 0;
2954 
2955 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2956 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2957 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2958 			     enable ? 1 : 0);
2959 	if(default_data != data)
2960 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2961 }
2962 
gfx_v9_0_enable_cp_power_gating(struct amdgpu_device * adev,bool enable)2963 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2964 					bool enable)
2965 {
2966 	uint32_t data = 0;
2967 	uint32_t default_data = 0;
2968 
2969 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2970 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2971 			     CP_PG_DISABLE,
2972 			     enable ? 0 : 1);
2973 	if(default_data != data)
2974 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2975 }
2976 
gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device * adev,bool enable)2977 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2978 						bool enable)
2979 {
2980 	uint32_t data, default_data;
2981 
2982 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2983 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2984 			     GFX_POWER_GATING_ENABLE,
2985 			     enable ? 1 : 0);
2986 	if(default_data != data)
2987 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2988 }
2989 
gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device * adev,bool enable)2990 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2991 						bool enable)
2992 {
2993 	uint32_t data, default_data;
2994 
2995 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2996 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2997 			     GFX_PIPELINE_PG_ENABLE,
2998 			     enable ? 1 : 0);
2999 	if(default_data != data)
3000 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3001 
3002 	if (!enable)
3003 		/* read any GFX register to wake up GFX */
3004 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3005 }
3006 
gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device * adev,bool enable)3007 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3008 						       bool enable)
3009 {
3010 	uint32_t data, default_data;
3011 
3012 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3013 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3014 			     STATIC_PER_CU_PG_ENABLE,
3015 			     enable ? 1 : 0);
3016 	if(default_data != data)
3017 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3018 }
3019 
gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device * adev,bool enable)3020 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3021 						bool enable)
3022 {
3023 	uint32_t data, default_data;
3024 
3025 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3026 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3027 			     DYN_PER_CU_PG_ENABLE,
3028 			     enable ? 1 : 0);
3029 	if(default_data != data)
3030 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3031 }
3032 
gfx_v9_0_init_pg(struct amdgpu_device * adev)3033 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3034 {
3035 	gfx_v9_0_init_csb(adev);
3036 
3037 	/*
3038 	 * Rlc save restore list is workable since v2_1.
3039 	 * And it's needed by gfxoff feature.
3040 	 */
3041 	if (adev->gfx.rlc.is_rlc_v2_1) {
3042 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
3043 			    IP_VERSION(9, 2, 1) ||
3044 		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
3045 			gfx_v9_1_init_rlc_save_restore_list(adev);
3046 		gfx_v9_0_enable_save_restore_machine(adev);
3047 	}
3048 
3049 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3050 			      AMD_PG_SUPPORT_GFX_SMG |
3051 			      AMD_PG_SUPPORT_GFX_DMG |
3052 			      AMD_PG_SUPPORT_CP |
3053 			      AMD_PG_SUPPORT_GDS |
3054 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
3055 		WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3056 			     adev->gfx.rlc.cp_table_gpu_addr >> 8);
3057 		gfx_v9_0_init_gfx_power_gating(adev);
3058 	}
3059 }
3060 
gfx_v9_0_rlc_stop(struct amdgpu_device * adev)3061 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3062 {
3063 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3064 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3065 	gfx_v9_0_wait_for_rlc_serdes(adev);
3066 }
3067 
gfx_v9_0_rlc_reset(struct amdgpu_device * adev)3068 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3069 {
3070 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3071 	udelay(50);
3072 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3073 	udelay(50);
3074 }
3075 
gfx_v9_0_rlc_start(struct amdgpu_device * adev)3076 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3077 {
3078 #ifdef AMDGPU_RLC_DEBUG_RETRY
3079 	u32 rlc_ucode_ver;
3080 #endif
3081 
3082 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3083 	udelay(50);
3084 
3085 	/* carrizo do enable cp interrupt after cp inited */
3086 	if (!(adev->flags & AMD_IS_APU)) {
3087 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3088 		udelay(50);
3089 	}
3090 
3091 #ifdef AMDGPU_RLC_DEBUG_RETRY
3092 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
3093 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3094 	if(rlc_ucode_ver == 0x108) {
3095 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3096 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
3097 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3098 		 * default is 0x9C4 to create a 100us interval */
3099 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3100 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3101 		 * to disable the page fault retry interrupts, default is
3102 		 * 0x100 (256) */
3103 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3104 	}
3105 #endif
3106 }
3107 
gfx_v9_0_rlc_load_microcode(struct amdgpu_device * adev)3108 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3109 {
3110 	const struct rlc_firmware_header_v2_0 *hdr;
3111 	const __le32 *fw_data;
3112 	unsigned i, fw_size;
3113 
3114 	if (!adev->gfx.rlc_fw)
3115 		return -EINVAL;
3116 
3117 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3118 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3119 
3120 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3121 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3122 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3123 
3124 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3125 			RLCG_UCODE_LOADING_START_ADDRESS);
3126 	for (i = 0; i < fw_size; i++)
3127 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3128 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3129 
3130 	return 0;
3131 }
3132 
gfx_v9_0_rlc_resume(struct amdgpu_device * adev)3133 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3134 {
3135 	int r;
3136 
3137 	if (amdgpu_sriov_vf(adev)) {
3138 		gfx_v9_0_init_csb(adev);
3139 		return 0;
3140 	}
3141 
3142 	adev->gfx.rlc.funcs->stop(adev);
3143 
3144 	/* disable CG */
3145 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3146 
3147 	gfx_v9_0_init_pg(adev);
3148 
3149 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3150 		/* legacy rlc firmware loading */
3151 		r = gfx_v9_0_rlc_load_microcode(adev);
3152 		if (r)
3153 			return r;
3154 	}
3155 
3156 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3157 	case IP_VERSION(9, 2, 2):
3158 	case IP_VERSION(9, 1, 0):
3159 		gfx_v9_0_init_lbpw(adev);
3160 		if (amdgpu_lbpw == 0)
3161 			gfx_v9_0_enable_lbpw(adev, false);
3162 		else
3163 			gfx_v9_0_enable_lbpw(adev, true);
3164 		break;
3165 	case IP_VERSION(9, 4, 0):
3166 		gfx_v9_4_init_lbpw(adev);
3167 		if (amdgpu_lbpw > 0)
3168 			gfx_v9_0_enable_lbpw(adev, true);
3169 		else
3170 			gfx_v9_0_enable_lbpw(adev, false);
3171 		break;
3172 	default:
3173 		break;
3174 	}
3175 
3176 	gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
3177 
3178 	adev->gfx.rlc.funcs->start(adev);
3179 
3180 	return 0;
3181 }
3182 
gfx_v9_0_cp_gfx_enable(struct amdgpu_device * adev,bool enable)3183 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3184 {
3185 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3186 
3187 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3188 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3189 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3190 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3191 	udelay(50);
3192 }
3193 
gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device * adev)3194 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3195 {
3196 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3197 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3198 	const struct gfx_firmware_header_v1_0 *me_hdr;
3199 	const __le32 *fw_data;
3200 	unsigned i, fw_size;
3201 
3202 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3203 		return -EINVAL;
3204 
3205 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3206 		adev->gfx.pfp_fw->data;
3207 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3208 		adev->gfx.ce_fw->data;
3209 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3210 		adev->gfx.me_fw->data;
3211 
3212 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3213 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3214 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3215 
3216 	gfx_v9_0_cp_gfx_enable(adev, false);
3217 
3218 	/* PFP */
3219 	fw_data = (const __le32 *)
3220 		(adev->gfx.pfp_fw->data +
3221 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3222 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3223 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3224 	for (i = 0; i < fw_size; i++)
3225 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3226 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3227 
3228 	/* CE */
3229 	fw_data = (const __le32 *)
3230 		(adev->gfx.ce_fw->data +
3231 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3232 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3233 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3234 	for (i = 0; i < fw_size; i++)
3235 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3236 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3237 
3238 	/* ME */
3239 	fw_data = (const __le32 *)
3240 		(adev->gfx.me_fw->data +
3241 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3242 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3243 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3244 	for (i = 0; i < fw_size; i++)
3245 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3246 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3247 
3248 	return 0;
3249 }
3250 
gfx_v9_0_cp_gfx_start(struct amdgpu_device * adev)3251 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3252 {
3253 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3254 	const struct cs_section_def *sect = NULL;
3255 	const struct cs_extent_def *ext = NULL;
3256 	int r, i, tmp;
3257 
3258 	/* init the CP */
3259 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3260 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3261 
3262 	gfx_v9_0_cp_gfx_enable(adev, true);
3263 
3264 	/* Now only limit the quirk on the APU gfx9 series and already
3265 	 * confirmed that the APU gfx10/gfx11 needn't such update.
3266 	 */
3267 	if (adev->flags & AMD_IS_APU &&
3268 			adev->in_s3 && !adev->suspend_complete) {
3269 		DRM_INFO(" Will skip the CSB packet resubmit\n");
3270 		return 0;
3271 	}
3272 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3273 	if (r) {
3274 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3275 		return r;
3276 	}
3277 
3278 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3279 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3280 
3281 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3282 	amdgpu_ring_write(ring, 0x80000000);
3283 	amdgpu_ring_write(ring, 0x80000000);
3284 
3285 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3286 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3287 			if (sect->id == SECT_CONTEXT) {
3288 				amdgpu_ring_write(ring,
3289 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3290 					       ext->reg_count));
3291 				amdgpu_ring_write(ring,
3292 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3293 				for (i = 0; i < ext->reg_count; i++)
3294 					amdgpu_ring_write(ring, ext->extent[i]);
3295 			}
3296 		}
3297 	}
3298 
3299 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3300 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3301 
3302 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3303 	amdgpu_ring_write(ring, 0);
3304 
3305 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3306 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3307 	amdgpu_ring_write(ring, 0x8000);
3308 	amdgpu_ring_write(ring, 0x8000);
3309 
3310 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3311 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3312 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3313 	amdgpu_ring_write(ring, tmp);
3314 	amdgpu_ring_write(ring, 0);
3315 
3316 	amdgpu_ring_commit(ring);
3317 
3318 	return 0;
3319 }
3320 
gfx_v9_0_cp_gfx_resume(struct amdgpu_device * adev)3321 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3322 {
3323 	struct amdgpu_ring *ring;
3324 	u32 tmp;
3325 	u32 rb_bufsz;
3326 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3327 
3328 	/* Set the write pointer delay */
3329 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3330 
3331 	/* set the RB to use vmid 0 */
3332 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3333 
3334 	/* Set ring buffer size */
3335 	ring = &adev->gfx.gfx_ring[0];
3336 	rb_bufsz = order_base_2(ring->ring_size / 8);
3337 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3338 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3339 #ifdef __BIG_ENDIAN
3340 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3341 #endif
3342 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3343 
3344 	/* Initialize the ring buffer's write pointers */
3345 	ring->wptr = 0;
3346 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3347 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3348 
3349 	/* set the wb address wether it's enabled or not */
3350 	rptr_addr = ring->rptr_gpu_addr;
3351 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3352 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3353 
3354 	wptr_gpu_addr = ring->wptr_gpu_addr;
3355 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3356 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3357 
3358 	mdelay(1);
3359 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3360 
3361 	rb_addr = ring->gpu_addr >> 8;
3362 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3363 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3364 
3365 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3366 	if (ring->use_doorbell) {
3367 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3368 				    DOORBELL_OFFSET, ring->doorbell_index);
3369 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3370 				    DOORBELL_EN, 1);
3371 	} else {
3372 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3373 	}
3374 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3375 
3376 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3377 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3378 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3379 
3380 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3381 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3382 
3383 
3384 	/* start the ring */
3385 	gfx_v9_0_cp_gfx_start(adev);
3386 
3387 	return 0;
3388 }
3389 
gfx_v9_0_cp_compute_enable(struct amdgpu_device * adev,bool enable)3390 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3391 {
3392 	if (enable) {
3393 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3394 	} else {
3395 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3396 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3397 		adev->gfx.kiq[0].ring.sched.ready = false;
3398 	}
3399 	udelay(50);
3400 }
3401 
gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device * adev)3402 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3403 {
3404 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3405 	const __le32 *fw_data;
3406 	unsigned i;
3407 	u32 tmp;
3408 
3409 	if (!adev->gfx.mec_fw)
3410 		return -EINVAL;
3411 
3412 	gfx_v9_0_cp_compute_enable(adev, false);
3413 
3414 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3415 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3416 
3417 	fw_data = (const __le32 *)
3418 		(adev->gfx.mec_fw->data +
3419 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3420 	tmp = 0;
3421 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3422 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3423 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3424 
3425 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3426 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3427 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3428 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3429 
3430 	/* MEC1 */
3431 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3432 			 mec_hdr->jt_offset);
3433 	for (i = 0; i < mec_hdr->jt_size; i++)
3434 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3435 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3436 
3437 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3438 			adev->gfx.mec_fw_version);
3439 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3440 
3441 	return 0;
3442 }
3443 
3444 /* KIQ functions */
gfx_v9_0_kiq_setting(struct amdgpu_ring * ring)3445 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3446 {
3447 	uint32_t tmp;
3448 	struct amdgpu_device *adev = ring->adev;
3449 
3450 	/* tell RLC which is KIQ queue */
3451 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3452 	tmp &= 0xffffff00;
3453 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3454 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3455 	tmp |= 0x80;
3456 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3457 }
3458 
gfx_v9_0_mqd_set_priority(struct amdgpu_ring * ring,struct v9_mqd * mqd)3459 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3460 {
3461 	struct amdgpu_device *adev = ring->adev;
3462 
3463 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3464 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3465 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3466 			mqd->cp_hqd_queue_priority =
3467 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3468 		}
3469 	}
3470 }
3471 
gfx_v9_0_mqd_init(struct amdgpu_ring * ring)3472 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3473 {
3474 	struct amdgpu_device *adev = ring->adev;
3475 	struct v9_mqd *mqd = ring->mqd_ptr;
3476 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3477 	uint32_t tmp;
3478 
3479 	mqd->header = 0xC0310800;
3480 	mqd->compute_pipelinestat_enable = 0x00000001;
3481 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3482 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3483 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3484 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3485 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3486 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3487 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3488 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3489 	mqd->compute_misc_reserved = 0x00000003;
3490 
3491 	mqd->dynamic_cu_mask_addr_lo =
3492 		lower_32_bits(ring->mqd_gpu_addr
3493 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3494 	mqd->dynamic_cu_mask_addr_hi =
3495 		upper_32_bits(ring->mqd_gpu_addr
3496 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3497 
3498 	eop_base_addr = ring->eop_gpu_addr >> 8;
3499 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3500 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3501 
3502 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3503 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3504 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3505 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3506 
3507 	mqd->cp_hqd_eop_control = tmp;
3508 
3509 	/* enable doorbell? */
3510 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3511 
3512 	if (ring->use_doorbell) {
3513 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3514 				    DOORBELL_OFFSET, ring->doorbell_index);
3515 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3516 				    DOORBELL_EN, 1);
3517 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3518 				    DOORBELL_SOURCE, 0);
3519 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3520 				    DOORBELL_HIT, 0);
3521 	} else {
3522 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3523 					 DOORBELL_EN, 0);
3524 	}
3525 
3526 	mqd->cp_hqd_pq_doorbell_control = tmp;
3527 
3528 	/* disable the queue if it's active */
3529 	ring->wptr = 0;
3530 	mqd->cp_hqd_dequeue_request = 0;
3531 	mqd->cp_hqd_pq_rptr = 0;
3532 	mqd->cp_hqd_pq_wptr_lo = 0;
3533 	mqd->cp_hqd_pq_wptr_hi = 0;
3534 
3535 	/* set the pointer to the MQD */
3536 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3537 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3538 
3539 	/* set MQD vmid to 0 */
3540 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3541 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3542 	mqd->cp_mqd_control = tmp;
3543 
3544 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3545 	hqd_gpu_addr = ring->gpu_addr >> 8;
3546 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3547 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3548 
3549 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3550 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3551 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3552 			    (order_base_2(ring->ring_size / 4) - 1));
3553 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3554 			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3555 #ifdef __BIG_ENDIAN
3556 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3557 #endif
3558 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3559 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3560 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3561 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3562 	mqd->cp_hqd_pq_control = tmp;
3563 
3564 	/* set the wb address whether it's enabled or not */
3565 	wb_gpu_addr = ring->rptr_gpu_addr;
3566 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3567 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3568 		upper_32_bits(wb_gpu_addr) & 0xffff;
3569 
3570 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3571 	wb_gpu_addr = ring->wptr_gpu_addr;
3572 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3573 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3574 
3575 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3576 	ring->wptr = 0;
3577 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3578 
3579 	/* set the vmid for the queue */
3580 	mqd->cp_hqd_vmid = 0;
3581 
3582 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3583 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3584 	mqd->cp_hqd_persistent_state = tmp;
3585 
3586 	/* set MIN_IB_AVAIL_SIZE */
3587 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3588 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3589 	mqd->cp_hqd_ib_control = tmp;
3590 
3591 	/* set static priority for a queue/ring */
3592 	gfx_v9_0_mqd_set_priority(ring, mqd);
3593 	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3594 
3595 	/* map_queues packet doesn't need activate the queue,
3596 	 * so only kiq need set this field.
3597 	 */
3598 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3599 		mqd->cp_hqd_active = 1;
3600 
3601 	return 0;
3602 }
3603 
gfx_v9_0_kiq_init_register(struct amdgpu_ring * ring)3604 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3605 {
3606 	struct amdgpu_device *adev = ring->adev;
3607 	struct v9_mqd *mqd = ring->mqd_ptr;
3608 	int j;
3609 
3610 	/* disable wptr polling */
3611 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3612 
3613 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3614 	       mqd->cp_hqd_eop_base_addr_lo);
3615 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3616 	       mqd->cp_hqd_eop_base_addr_hi);
3617 
3618 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3619 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3620 	       mqd->cp_hqd_eop_control);
3621 
3622 	/* enable doorbell? */
3623 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3624 	       mqd->cp_hqd_pq_doorbell_control);
3625 
3626 	/* disable the queue if it's active */
3627 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3628 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3629 		for (j = 0; j < adev->usec_timeout; j++) {
3630 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3631 				break;
3632 			udelay(1);
3633 		}
3634 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3635 		       mqd->cp_hqd_dequeue_request);
3636 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3637 		       mqd->cp_hqd_pq_rptr);
3638 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3639 		       mqd->cp_hqd_pq_wptr_lo);
3640 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3641 		       mqd->cp_hqd_pq_wptr_hi);
3642 	}
3643 
3644 	/* set the pointer to the MQD */
3645 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3646 	       mqd->cp_mqd_base_addr_lo);
3647 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3648 	       mqd->cp_mqd_base_addr_hi);
3649 
3650 	/* set MQD vmid to 0 */
3651 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3652 	       mqd->cp_mqd_control);
3653 
3654 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3655 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3656 	       mqd->cp_hqd_pq_base_lo);
3657 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3658 	       mqd->cp_hqd_pq_base_hi);
3659 
3660 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3661 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3662 	       mqd->cp_hqd_pq_control);
3663 
3664 	/* set the wb address whether it's enabled or not */
3665 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3666 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3667 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3668 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3669 
3670 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3671 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3672 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3673 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3674 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3675 
3676 	/* enable the doorbell if requested */
3677 	if (ring->use_doorbell) {
3678 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3679 					(adev->doorbell_index.kiq * 2) << 2);
3680 		/* If GC has entered CGPG, ringing doorbell > first page
3681 		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3682 		 * workaround this issue. And this change has to align with firmware
3683 		 * update.
3684 		 */
3685 		if (check_if_enlarge_doorbell_range(adev))
3686 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3687 					(adev->doorbell.size - 4));
3688 		else
3689 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3690 					(adev->doorbell_index.userqueue_end * 2) << 2);
3691 	}
3692 
3693 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3694 	       mqd->cp_hqd_pq_doorbell_control);
3695 
3696 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3697 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3698 	       mqd->cp_hqd_pq_wptr_lo);
3699 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3700 	       mqd->cp_hqd_pq_wptr_hi);
3701 
3702 	/* set the vmid for the queue */
3703 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3704 
3705 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3706 	       mqd->cp_hqd_persistent_state);
3707 
3708 	/* activate the queue */
3709 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3710 	       mqd->cp_hqd_active);
3711 
3712 	if (ring->use_doorbell)
3713 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3714 
3715 	return 0;
3716 }
3717 
gfx_v9_0_kiq_fini_register(struct amdgpu_ring * ring)3718 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3719 {
3720 	struct amdgpu_device *adev = ring->adev;
3721 	int j;
3722 
3723 	/* disable the queue if it's active */
3724 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3725 
3726 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3727 
3728 		for (j = 0; j < adev->usec_timeout; j++) {
3729 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3730 				break;
3731 			udelay(1);
3732 		}
3733 
3734 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3735 			DRM_DEBUG("KIQ dequeue request failed.\n");
3736 
3737 			/* Manual disable if dequeue request times out */
3738 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3739 		}
3740 
3741 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3742 		      0);
3743 	}
3744 
3745 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3746 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3747 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3748 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3749 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3750 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3751 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3752 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3753 
3754 	return 0;
3755 }
3756 
gfx_v9_0_kiq_init_queue(struct amdgpu_ring * ring)3757 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3758 {
3759 	struct amdgpu_device *adev = ring->adev;
3760 	struct v9_mqd *mqd = ring->mqd_ptr;
3761 	struct v9_mqd *tmp_mqd;
3762 
3763 	gfx_v9_0_kiq_setting(ring);
3764 
3765 	/* GPU could be in bad state during probe, driver trigger the reset
3766 	 * after load the SMU, in this case , the mqd is not be initialized.
3767 	 * driver need to re-init the mqd.
3768 	 * check mqd->cp_hqd_pq_control since this value should not be 0
3769 	 */
3770 	tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
3771 	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3772 		/* for GPU_RESET case , reset MQD to a clean status */
3773 		if (adev->gfx.kiq[0].mqd_backup)
3774 			memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
3775 
3776 		/* reset ring buffer */
3777 		ring->wptr = 0;
3778 		amdgpu_ring_clear_ring(ring);
3779 
3780 		mutex_lock(&adev->srbm_mutex);
3781 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3782 		gfx_v9_0_kiq_init_register(ring);
3783 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3784 		mutex_unlock(&adev->srbm_mutex);
3785 	} else {
3786 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3787 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3788 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3789 		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3790 			amdgpu_ring_clear_ring(ring);
3791 		mutex_lock(&adev->srbm_mutex);
3792 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3793 		gfx_v9_0_mqd_init(ring);
3794 		gfx_v9_0_kiq_init_register(ring);
3795 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3796 		mutex_unlock(&adev->srbm_mutex);
3797 
3798 		if (adev->gfx.kiq[0].mqd_backup)
3799 			memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
3800 	}
3801 
3802 	return 0;
3803 }
3804 
gfx_v9_0_kcq_init_queue(struct amdgpu_ring * ring,bool restore)3805 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
3806 {
3807 	struct amdgpu_device *adev = ring->adev;
3808 	struct v9_mqd *mqd = ring->mqd_ptr;
3809 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3810 	struct v9_mqd *tmp_mqd;
3811 
3812 	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3813 	 * is not be initialized before
3814 	 */
3815 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3816 
3817 	if (!restore && (!tmp_mqd->cp_hqd_pq_control ||
3818 	    (!amdgpu_in_reset(adev) && !adev->in_suspend))) {
3819 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3820 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3821 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3822 		mutex_lock(&adev->srbm_mutex);
3823 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3824 		gfx_v9_0_mqd_init(ring);
3825 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3826 		mutex_unlock(&adev->srbm_mutex);
3827 
3828 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3829 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3830 	} else {
3831 		/* restore MQD to a clean status */
3832 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3833 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3834 		/* reset ring buffer */
3835 		ring->wptr = 0;
3836 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3837 		amdgpu_ring_clear_ring(ring);
3838 	}
3839 
3840 	return 0;
3841 }
3842 
gfx_v9_0_kiq_resume(struct amdgpu_device * adev)3843 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3844 {
3845 	struct amdgpu_ring *ring;
3846 	int r;
3847 
3848 	ring = &adev->gfx.kiq[0].ring;
3849 
3850 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3851 	if (unlikely(r != 0))
3852 		return r;
3853 
3854 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3855 	if (unlikely(r != 0)) {
3856 		amdgpu_bo_unreserve(ring->mqd_obj);
3857 		return r;
3858 	}
3859 
3860 	gfx_v9_0_kiq_init_queue(ring);
3861 	amdgpu_bo_kunmap(ring->mqd_obj);
3862 	ring->mqd_ptr = NULL;
3863 	amdgpu_bo_unreserve(ring->mqd_obj);
3864 	return 0;
3865 }
3866 
gfx_v9_0_kcq_resume(struct amdgpu_device * adev)3867 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3868 {
3869 	struct amdgpu_ring *ring = NULL;
3870 	int r = 0, i;
3871 
3872 	gfx_v9_0_cp_compute_enable(adev, true);
3873 
3874 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3875 		ring = &adev->gfx.compute_ring[i];
3876 
3877 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3878 		if (unlikely(r != 0))
3879 			goto done;
3880 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3881 		if (!r) {
3882 			r = gfx_v9_0_kcq_init_queue(ring, false);
3883 			amdgpu_bo_kunmap(ring->mqd_obj);
3884 			ring->mqd_ptr = NULL;
3885 		}
3886 		amdgpu_bo_unreserve(ring->mqd_obj);
3887 		if (r)
3888 			goto done;
3889 	}
3890 
3891 	r = amdgpu_gfx_enable_kcq(adev, 0);
3892 done:
3893 	return r;
3894 }
3895 
gfx_v9_0_cp_resume(struct amdgpu_device * adev)3896 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3897 {
3898 	int r, i;
3899 	struct amdgpu_ring *ring;
3900 
3901 	if (!(adev->flags & AMD_IS_APU))
3902 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3903 
3904 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3905 		if (adev->gfx.num_gfx_rings) {
3906 			/* legacy firmware loading */
3907 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3908 			if (r)
3909 				return r;
3910 		}
3911 
3912 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3913 		if (r)
3914 			return r;
3915 	}
3916 
3917 	r = gfx_v9_0_kiq_resume(adev);
3918 	if (r)
3919 		return r;
3920 
3921 	if (adev->gfx.num_gfx_rings) {
3922 		r = gfx_v9_0_cp_gfx_resume(adev);
3923 		if (r)
3924 			return r;
3925 	}
3926 
3927 	r = gfx_v9_0_kcq_resume(adev);
3928 	if (r)
3929 		return r;
3930 
3931 	if (adev->gfx.num_gfx_rings) {
3932 		ring = &adev->gfx.gfx_ring[0];
3933 		r = amdgpu_ring_test_helper(ring);
3934 		if (r)
3935 			return r;
3936 	}
3937 
3938 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3939 		ring = &adev->gfx.compute_ring[i];
3940 		amdgpu_ring_test_helper(ring);
3941 	}
3942 
3943 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3944 
3945 	return 0;
3946 }
3947 
gfx_v9_0_init_tcp_config(struct amdgpu_device * adev)3948 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3949 {
3950 	u32 tmp;
3951 
3952 	if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) &&
3953 	    amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))
3954 		return;
3955 
3956 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3957 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3958 				adev->df.hash_status.hash_64k);
3959 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3960 				adev->df.hash_status.hash_2m);
3961 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3962 				adev->df.hash_status.hash_1g);
3963 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3964 }
3965 
gfx_v9_0_cp_enable(struct amdgpu_device * adev,bool enable)3966 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3967 {
3968 	if (adev->gfx.num_gfx_rings)
3969 		gfx_v9_0_cp_gfx_enable(adev, enable);
3970 	gfx_v9_0_cp_compute_enable(adev, enable);
3971 }
3972 
gfx_v9_0_hw_init(void * handle)3973 static int gfx_v9_0_hw_init(void *handle)
3974 {
3975 	int r;
3976 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3977 
3978 	amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
3979 				       adev->gfx.cleaner_shader_ptr);
3980 
3981 	if (!amdgpu_sriov_vf(adev))
3982 		gfx_v9_0_init_golden_registers(adev);
3983 
3984 	gfx_v9_0_constants_init(adev);
3985 
3986 	gfx_v9_0_init_tcp_config(adev);
3987 
3988 	r = adev->gfx.rlc.funcs->resume(adev);
3989 	if (r)
3990 		return r;
3991 
3992 	r = gfx_v9_0_cp_resume(adev);
3993 	if (r)
3994 		return r;
3995 
3996 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
3997 		gfx_v9_4_2_set_power_brake_sequence(adev);
3998 
3999 	return r;
4000 }
4001 
gfx_v9_0_hw_fini(void * handle)4002 static int gfx_v9_0_hw_fini(void *handle)
4003 {
4004 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4005 
4006 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4007 		amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4008 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4009 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4010 	amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
4011 
4012 	/* DF freeze and kcq disable will fail */
4013 	if (!amdgpu_ras_intr_triggered())
4014 		/* disable KCQ to avoid CPC touch memory not valid anymore */
4015 		amdgpu_gfx_disable_kcq(adev, 0);
4016 
4017 	if (amdgpu_sriov_vf(adev)) {
4018 		gfx_v9_0_cp_gfx_enable(adev, false);
4019 		/* must disable polling for SRIOV when hw finished, otherwise
4020 		 * CPC engine may still keep fetching WB address which is already
4021 		 * invalid after sw finished and trigger DMAR reading error in
4022 		 * hypervisor side.
4023 		 */
4024 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4025 		return 0;
4026 	}
4027 
4028 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
4029 	 * otherwise KIQ is hanging when binding back
4030 	 */
4031 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4032 		mutex_lock(&adev->srbm_mutex);
4033 		soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
4034 				adev->gfx.kiq[0].ring.pipe,
4035 				adev->gfx.kiq[0].ring.queue, 0, 0);
4036 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
4037 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
4038 		mutex_unlock(&adev->srbm_mutex);
4039 	}
4040 
4041 	gfx_v9_0_cp_enable(adev, false);
4042 
4043 	/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
4044 	if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
4045 	    (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) {
4046 		dev_dbg(adev->dev, "Skipping RLC halt\n");
4047 		return 0;
4048 	}
4049 
4050 	adev->gfx.rlc.funcs->stop(adev);
4051 	return 0;
4052 }
4053 
gfx_v9_0_suspend(void * handle)4054 static int gfx_v9_0_suspend(void *handle)
4055 {
4056 	return gfx_v9_0_hw_fini(handle);
4057 }
4058 
gfx_v9_0_resume(void * handle)4059 static int gfx_v9_0_resume(void *handle)
4060 {
4061 	return gfx_v9_0_hw_init(handle);
4062 }
4063 
gfx_v9_0_is_idle(void * handle)4064 static bool gfx_v9_0_is_idle(void *handle)
4065 {
4066 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4067 
4068 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4069 				GRBM_STATUS, GUI_ACTIVE))
4070 		return false;
4071 	else
4072 		return true;
4073 }
4074 
gfx_v9_0_wait_for_idle(void * handle)4075 static int gfx_v9_0_wait_for_idle(void *handle)
4076 {
4077 	unsigned i;
4078 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4079 
4080 	for (i = 0; i < adev->usec_timeout; i++) {
4081 		if (gfx_v9_0_is_idle(handle))
4082 			return 0;
4083 		udelay(1);
4084 	}
4085 	return -ETIMEDOUT;
4086 }
4087 
gfx_v9_0_soft_reset(void * handle)4088 static int gfx_v9_0_soft_reset(void *handle)
4089 {
4090 	u32 grbm_soft_reset = 0;
4091 	u32 tmp;
4092 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4093 
4094 	/* GRBM_STATUS */
4095 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4096 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4097 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4098 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4099 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4100 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4101 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4102 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4103 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4104 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4105 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4106 	}
4107 
4108 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4109 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4110 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4111 	}
4112 
4113 	/* GRBM_STATUS2 */
4114 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4115 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4116 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4117 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4118 
4119 
4120 	if (grbm_soft_reset) {
4121 		/* stop the rlc */
4122 		adev->gfx.rlc.funcs->stop(adev);
4123 
4124 		if (adev->gfx.num_gfx_rings)
4125 			/* Disable GFX parsing/prefetching */
4126 			gfx_v9_0_cp_gfx_enable(adev, false);
4127 
4128 		/* Disable MEC parsing/prefetching */
4129 		gfx_v9_0_cp_compute_enable(adev, false);
4130 
4131 		if (grbm_soft_reset) {
4132 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4133 			tmp |= grbm_soft_reset;
4134 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4135 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4136 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4137 
4138 			udelay(50);
4139 
4140 			tmp &= ~grbm_soft_reset;
4141 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4142 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4143 		}
4144 
4145 		/* Wait a little for things to settle down */
4146 		udelay(50);
4147 	}
4148 	return 0;
4149 }
4150 
gfx_v9_0_kiq_read_clock(struct amdgpu_device * adev)4151 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4152 {
4153 	signed long r, cnt = 0;
4154 	unsigned long flags;
4155 	uint32_t seq, reg_val_offs = 0;
4156 	uint64_t value = 0;
4157 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
4158 	struct amdgpu_ring *ring = &kiq->ring;
4159 
4160 	BUG_ON(!ring->funcs->emit_rreg);
4161 
4162 	spin_lock_irqsave(&kiq->ring_lock, flags);
4163 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4164 		pr_err("critical bug! too many kiq readers\n");
4165 		goto failed_unlock;
4166 	}
4167 	amdgpu_ring_alloc(ring, 32);
4168 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4169 	amdgpu_ring_write(ring, 9 |	/* src: register*/
4170 				(5 << 8) |	/* dst: memory */
4171 				(1 << 16) |	/* count sel */
4172 				(1 << 20));	/* write confirm */
4173 	amdgpu_ring_write(ring, 0);
4174 	amdgpu_ring_write(ring, 0);
4175 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4176 				reg_val_offs * 4));
4177 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4178 				reg_val_offs * 4));
4179 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4180 	if (r)
4181 		goto failed_undo;
4182 
4183 	amdgpu_ring_commit(ring);
4184 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4185 
4186 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4187 
4188 	/* don't wait anymore for gpu reset case because this way may
4189 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4190 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4191 	 * never return if we keep waiting in virt_kiq_rreg, which cause
4192 	 * gpu_recover() hang there.
4193 	 *
4194 	 * also don't wait anymore for IRQ context
4195 	 * */
4196 	if (r < 1 && (amdgpu_in_reset(adev)))
4197 		goto failed_kiq_read;
4198 
4199 	might_sleep();
4200 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4201 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4202 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4203 	}
4204 
4205 	if (cnt > MAX_KIQ_REG_TRY)
4206 		goto failed_kiq_read;
4207 
4208 	mb();
4209 	value = (uint64_t)adev->wb.wb[reg_val_offs] |
4210 		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4211 	amdgpu_device_wb_free(adev, reg_val_offs);
4212 	return value;
4213 
4214 failed_undo:
4215 	amdgpu_ring_undo(ring);
4216 failed_unlock:
4217 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4218 failed_kiq_read:
4219 	if (reg_val_offs)
4220 		amdgpu_device_wb_free(adev, reg_val_offs);
4221 	pr_err("failed to read gpu clock\n");
4222 	return ~0;
4223 }
4224 
gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device * adev)4225 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4226 {
4227 	uint64_t clock, clock_lo, clock_hi, hi_check;
4228 
4229 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
4230 	case IP_VERSION(9, 3, 0):
4231 		preempt_disable();
4232 		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4233 		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4234 		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4235 		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4236 		 * roughly every 42 seconds.
4237 		 */
4238 		if (hi_check != clock_hi) {
4239 			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4240 			clock_hi = hi_check;
4241 		}
4242 		preempt_enable();
4243 		clock = clock_lo | (clock_hi << 32ULL);
4244 		break;
4245 	default:
4246 		amdgpu_gfx_off_ctrl(adev, false);
4247 		mutex_lock(&adev->gfx.gpu_clock_mutex);
4248 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
4249 			    IP_VERSION(9, 0, 1) &&
4250 		    amdgpu_sriov_runtime(adev)) {
4251 			clock = gfx_v9_0_kiq_read_clock(adev);
4252 		} else {
4253 			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4254 			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4255 				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4256 		}
4257 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
4258 		amdgpu_gfx_off_ctrl(adev, true);
4259 		break;
4260 	}
4261 	return clock;
4262 }
4263 
gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring * ring,uint32_t vmid,uint32_t gds_base,uint32_t gds_size,uint32_t gws_base,uint32_t gws_size,uint32_t oa_base,uint32_t oa_size)4264 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4265 					  uint32_t vmid,
4266 					  uint32_t gds_base, uint32_t gds_size,
4267 					  uint32_t gws_base, uint32_t gws_size,
4268 					  uint32_t oa_base, uint32_t oa_size)
4269 {
4270 	struct amdgpu_device *adev = ring->adev;
4271 
4272 	/* GDS Base */
4273 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4274 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4275 				   gds_base);
4276 
4277 	/* GDS Size */
4278 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4279 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4280 				   gds_size);
4281 
4282 	/* GWS */
4283 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4284 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4285 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4286 
4287 	/* OA */
4288 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4289 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4290 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4291 }
4292 
4293 static const u32 vgpr_init_compute_shader[] =
4294 {
4295 	0xb07c0000, 0xbe8000ff,
4296 	0x000000f8, 0xbf110800,
4297 	0x7e000280, 0x7e020280,
4298 	0x7e040280, 0x7e060280,
4299 	0x7e080280, 0x7e0a0280,
4300 	0x7e0c0280, 0x7e0e0280,
4301 	0x80808800, 0xbe803200,
4302 	0xbf84fff5, 0xbf9c0000,
4303 	0xd28c0001, 0x0001007f,
4304 	0xd28d0001, 0x0002027e,
4305 	0x10020288, 0xb8810904,
4306 	0xb7814000, 0xd1196a01,
4307 	0x00000301, 0xbe800087,
4308 	0xbefc00c1, 0xd89c4000,
4309 	0x00020201, 0xd89cc080,
4310 	0x00040401, 0x320202ff,
4311 	0x00000800, 0x80808100,
4312 	0xbf84fff8, 0x7e020280,
4313 	0xbf810000, 0x00000000,
4314 };
4315 
4316 static const u32 sgpr_init_compute_shader[] =
4317 {
4318 	0xb07c0000, 0xbe8000ff,
4319 	0x0000005f, 0xbee50080,
4320 	0xbe812c65, 0xbe822c65,
4321 	0xbe832c65, 0xbe842c65,
4322 	0xbe852c65, 0xb77c0005,
4323 	0x80808500, 0xbf84fff8,
4324 	0xbe800080, 0xbf810000,
4325 };
4326 
4327 static const u32 vgpr_init_compute_shader_arcturus[] = {
4328 	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4329 	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4330 	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4331 	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4332 	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4333 	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4334 	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4335 	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4336 	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4337 	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4338 	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4339 	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4340 	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4341 	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4342 	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4343 	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4344 	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4345 	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4346 	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4347 	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4348 	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4349 	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4350 	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4351 	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4352 	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4353 	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4354 	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4355 	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4356 	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4357 	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4358 	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4359 	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4360 	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4361 	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4362 	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4363 	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4364 	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4365 	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4366 	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4367 	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4368 	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4369 	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4370 	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4371 	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4372 	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4373 	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4374 	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4375 	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4376 	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4377 	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4378 	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4379 	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4380 	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4381 	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4382 	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4383 	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4384 	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4385 	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4386 	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4387 	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4388 	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4389 	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4390 	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4391 	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4392 	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4393 	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4394 	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4395 	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4396 	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4397 	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4398 	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4399 	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4400 	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4401 	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4402 	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4403 	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4404 	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4405 	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4406 	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4407 	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4408 	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4409 	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4410 	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4411 	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4412 	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4413 	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4414 	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4415 	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4416 	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4417 	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4418 	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4419 	0xbf84fff8, 0xbf810000,
4420 };
4421 
4422 /* When below register arrays changed, please update gpr_reg_size,
4423   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4424   to cover all gfx9 ASICs */
4425 static const struct soc15_reg_entry vgpr_init_regs[] = {
4426    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4427    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4428    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4429    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4430    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4431    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4432    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4433    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4434    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4435    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4436    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4437    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4438    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4439    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4440 };
4441 
4442 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4443    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4444    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4445    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4446    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4447    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4448    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4449    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4450    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4451    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4452    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4453    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4454    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4455    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4456    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4457 };
4458 
4459 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4460    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4461    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4462    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4463    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4464    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4465    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4466    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4467    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4468    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4469    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4470    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4471    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4472    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4473    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4474 };
4475 
4476 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4477    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4478    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4479    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4480    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4481    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4482    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4483    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4484    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4485    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4486    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4487    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4488    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4489    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4490    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4491 };
4492 
4493 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4494    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4495    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4496    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4497    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4498    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4499    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4500    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4501    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4502    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4503    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4504    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4505    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4506    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4507    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4508    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4509    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4510    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4511    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4512    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4513    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4514    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4515    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4516    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4517    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4518    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4519    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4520    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4521    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4522    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4523    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4524    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4525    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4526    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4527 };
4528 
gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device * adev)4529 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4530 {
4531 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4532 	int i, r;
4533 
4534 	/* only support when RAS is enabled */
4535 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4536 		return 0;
4537 
4538 	r = amdgpu_ring_alloc(ring, 7);
4539 	if (r) {
4540 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4541 			ring->name, r);
4542 		return r;
4543 	}
4544 
4545 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4546 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4547 
4548 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4549 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4550 				PACKET3_DMA_DATA_DST_SEL(1) |
4551 				PACKET3_DMA_DATA_SRC_SEL(2) |
4552 				PACKET3_DMA_DATA_ENGINE(0)));
4553 	amdgpu_ring_write(ring, 0);
4554 	amdgpu_ring_write(ring, 0);
4555 	amdgpu_ring_write(ring, 0);
4556 	amdgpu_ring_write(ring, 0);
4557 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4558 				adev->gds.gds_size);
4559 
4560 	amdgpu_ring_commit(ring);
4561 
4562 	for (i = 0; i < adev->usec_timeout; i++) {
4563 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4564 			break;
4565 		udelay(1);
4566 	}
4567 
4568 	if (i >= adev->usec_timeout)
4569 		r = -ETIMEDOUT;
4570 
4571 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4572 
4573 	return r;
4574 }
4575 
gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device * adev)4576 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4577 {
4578 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4579 	struct amdgpu_ib ib;
4580 	struct dma_fence *f = NULL;
4581 	int r, i;
4582 	unsigned total_size, vgpr_offset, sgpr_offset;
4583 	u64 gpu_addr;
4584 
4585 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4586 						adev->gfx.config.max_cu_per_sh *
4587 						adev->gfx.config.max_sh_per_se;
4588 	int sgpr_work_group_size = 5;
4589 	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4590 	int vgpr_init_shader_size;
4591 	const u32 *vgpr_init_shader_ptr;
4592 	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4593 
4594 	/* only support when RAS is enabled */
4595 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4596 		return 0;
4597 
4598 	/* bail if the compute ring is not ready */
4599 	if (!ring->sched.ready)
4600 		return 0;
4601 
4602 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
4603 		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4604 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4605 		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4606 	} else {
4607 		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4608 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4609 		vgpr_init_regs_ptr = vgpr_init_regs;
4610 	}
4611 
4612 	total_size =
4613 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4614 	total_size +=
4615 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4616 	total_size +=
4617 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4618 	total_size = ALIGN(total_size, 256);
4619 	vgpr_offset = total_size;
4620 	total_size += ALIGN(vgpr_init_shader_size, 256);
4621 	sgpr_offset = total_size;
4622 	total_size += sizeof(sgpr_init_compute_shader);
4623 
4624 	/* allocate an indirect buffer to put the commands in */
4625 	memset(&ib, 0, sizeof(ib));
4626 	r = amdgpu_ib_get(adev, NULL, total_size,
4627 					AMDGPU_IB_POOL_DIRECT, &ib);
4628 	if (r) {
4629 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4630 		return r;
4631 	}
4632 
4633 	/* load the compute shaders */
4634 	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4635 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4636 
4637 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4638 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4639 
4640 	/* init the ib length to 0 */
4641 	ib.length_dw = 0;
4642 
4643 	/* VGPR */
4644 	/* write the register state for the compute dispatch */
4645 	for (i = 0; i < gpr_reg_size; i++) {
4646 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4647 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4648 								- PACKET3_SET_SH_REG_START;
4649 		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4650 	}
4651 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4652 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4653 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4654 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4655 							- PACKET3_SET_SH_REG_START;
4656 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4657 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4658 
4659 	/* write dispatch packet */
4660 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4661 	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4662 	ib.ptr[ib.length_dw++] = 1; /* y */
4663 	ib.ptr[ib.length_dw++] = 1; /* z */
4664 	ib.ptr[ib.length_dw++] =
4665 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4666 
4667 	/* write CS partial flush packet */
4668 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4669 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4670 
4671 	/* SGPR1 */
4672 	/* write the register state for the compute dispatch */
4673 	for (i = 0; i < gpr_reg_size; i++) {
4674 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4675 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4676 								- PACKET3_SET_SH_REG_START;
4677 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4678 	}
4679 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4680 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4681 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4682 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4683 							- PACKET3_SET_SH_REG_START;
4684 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4685 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4686 
4687 	/* write dispatch packet */
4688 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4689 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4690 	ib.ptr[ib.length_dw++] = 1; /* y */
4691 	ib.ptr[ib.length_dw++] = 1; /* z */
4692 	ib.ptr[ib.length_dw++] =
4693 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4694 
4695 	/* write CS partial flush packet */
4696 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4697 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4698 
4699 	/* SGPR2 */
4700 	/* write the register state for the compute dispatch */
4701 	for (i = 0; i < gpr_reg_size; i++) {
4702 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4703 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4704 								- PACKET3_SET_SH_REG_START;
4705 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4706 	}
4707 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4708 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4709 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4710 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4711 							- PACKET3_SET_SH_REG_START;
4712 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4713 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4714 
4715 	/* write dispatch packet */
4716 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4717 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4718 	ib.ptr[ib.length_dw++] = 1; /* y */
4719 	ib.ptr[ib.length_dw++] = 1; /* z */
4720 	ib.ptr[ib.length_dw++] =
4721 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4722 
4723 	/* write CS partial flush packet */
4724 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4725 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4726 
4727 	/* shedule the ib on the ring */
4728 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4729 	if (r) {
4730 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4731 		goto fail;
4732 	}
4733 
4734 	/* wait for the GPU to finish processing the IB */
4735 	r = dma_fence_wait(f, false);
4736 	if (r) {
4737 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4738 		goto fail;
4739 	}
4740 
4741 fail:
4742 	amdgpu_ib_free(adev, &ib, NULL);
4743 	dma_fence_put(f);
4744 
4745 	return r;
4746 }
4747 
gfx_v9_0_early_init(void * handle)4748 static int gfx_v9_0_early_init(void *handle)
4749 {
4750 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4751 
4752 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4753 
4754 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
4755 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4756 		adev->gfx.num_gfx_rings = 0;
4757 	else
4758 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4759 	adev->gfx.xcc_mask = 1;
4760 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4761 					  AMDGPU_MAX_COMPUTE_RINGS);
4762 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4763 	gfx_v9_0_set_ring_funcs(adev);
4764 	gfx_v9_0_set_irq_funcs(adev);
4765 	gfx_v9_0_set_gds_init(adev);
4766 	gfx_v9_0_set_rlc_funcs(adev);
4767 
4768 	/* init rlcg reg access ctrl */
4769 	gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4770 
4771 	return gfx_v9_0_init_microcode(adev);
4772 }
4773 
gfx_v9_0_ecc_late_init(void * handle)4774 static int gfx_v9_0_ecc_late_init(void *handle)
4775 {
4776 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4777 	int r;
4778 
4779 	/*
4780 	 * Temp workaround to fix the issue that CP firmware fails to
4781 	 * update read pointer when CPDMA is writing clearing operation
4782 	 * to GDS in suspend/resume sequence on several cards. So just
4783 	 * limit this operation in cold boot sequence.
4784 	 */
4785 	if ((!adev->in_suspend) &&
4786 	    (adev->gds.gds_size)) {
4787 		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4788 		if (r)
4789 			return r;
4790 	}
4791 
4792 	/* requires IBs so do in late init after IB pool is initialized */
4793 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4794 		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4795 	else
4796 		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4797 
4798 	if (r)
4799 		return r;
4800 
4801 	if (adev->gfx.ras &&
4802 	    adev->gfx.ras->enable_watchdog_timer)
4803 		adev->gfx.ras->enable_watchdog_timer(adev);
4804 
4805 	return 0;
4806 }
4807 
gfx_v9_0_late_init(void * handle)4808 static int gfx_v9_0_late_init(void *handle)
4809 {
4810 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4811 	int r;
4812 
4813 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4814 	if (r)
4815 		return r;
4816 
4817 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4818 	if (r)
4819 		return r;
4820 
4821 	r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
4822 	if (r)
4823 		return r;
4824 
4825 	r = gfx_v9_0_ecc_late_init(handle);
4826 	if (r)
4827 		return r;
4828 
4829 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4830 		gfx_v9_4_2_debug_trap_config_init(adev,
4831 			adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4832 	else
4833 		gfx_v9_0_debug_trap_config_init(adev,
4834 			adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4835 
4836 	return 0;
4837 }
4838 
gfx_v9_0_is_rlc_enabled(struct amdgpu_device * adev)4839 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4840 {
4841 	uint32_t rlc_setting;
4842 
4843 	/* if RLC is not enabled, do nothing */
4844 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4845 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4846 		return false;
4847 
4848 	return true;
4849 }
4850 
gfx_v9_0_set_safe_mode(struct amdgpu_device * adev,int xcc_id)4851 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4852 {
4853 	uint32_t data;
4854 	unsigned i;
4855 
4856 	data = RLC_SAFE_MODE__CMD_MASK;
4857 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4858 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4859 
4860 	/* wait for RLC_SAFE_MODE */
4861 	for (i = 0; i < adev->usec_timeout; i++) {
4862 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4863 			break;
4864 		udelay(1);
4865 	}
4866 }
4867 
gfx_v9_0_unset_safe_mode(struct amdgpu_device * adev,int xcc_id)4868 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4869 {
4870 	uint32_t data;
4871 
4872 	data = RLC_SAFE_MODE__CMD_MASK;
4873 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4874 }
4875 
gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device * adev,bool enable)4876 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4877 						bool enable)
4878 {
4879 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4880 
4881 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4882 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4883 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4884 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4885 	} else {
4886 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4887 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4888 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4889 	}
4890 
4891 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4892 }
4893 
gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device * adev,bool enable)4894 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4895 						bool enable)
4896 {
4897 	/* TODO: double check if we need to perform under safe mode */
4898 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4899 
4900 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4901 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4902 	else
4903 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4904 
4905 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4906 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4907 	else
4908 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4909 
4910 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4911 }
4912 
gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device * adev,bool enable)4913 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4914 						      bool enable)
4915 {
4916 	uint32_t data, def;
4917 
4918 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4919 
4920 	/* It is disabled by HW by default */
4921 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4922 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4923 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4924 
4925 		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4926 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4927 
4928 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4929 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4930 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4931 
4932 		/* only for Vega10 & Raven1 */
4933 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4934 
4935 		if (def != data)
4936 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4937 
4938 		/* MGLS is a global flag to control all MGLS in GFX */
4939 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4940 			/* 2 - RLC memory Light sleep */
4941 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4942 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4943 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4944 				if (def != data)
4945 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4946 			}
4947 			/* 3 - CP memory Light sleep */
4948 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4949 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4950 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4951 				if (def != data)
4952 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4953 			}
4954 		}
4955 	} else {
4956 		/* 1 - MGCG_OVERRIDE */
4957 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4958 
4959 		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4960 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4961 
4962 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4963 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4964 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4965 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4966 
4967 		if (def != data)
4968 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4969 
4970 		/* 2 - disable MGLS in RLC */
4971 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4972 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4973 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4974 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4975 		}
4976 
4977 		/* 3 - disable MGLS in CP */
4978 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4979 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4980 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4981 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4982 		}
4983 	}
4984 
4985 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4986 }
4987 
gfx_v9_0_update_3d_clock_gating(struct amdgpu_device * adev,bool enable)4988 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4989 					   bool enable)
4990 {
4991 	uint32_t data, def;
4992 
4993 	if (!adev->gfx.num_gfx_rings)
4994 		return;
4995 
4996 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4997 
4998 	/* Enable 3D CGCG/CGLS */
4999 	if (enable) {
5000 		/* write cmd to clear cgcg/cgls ov */
5001 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5002 		/* unset CGCG override */
5003 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5004 		/* update CGCG and CGLS override bits */
5005 		if (def != data)
5006 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5007 
5008 		/* enable 3Dcgcg FSM(0x0000363f) */
5009 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5010 
5011 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5012 			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5013 				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5014 		else
5015 			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
5016 
5017 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5018 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5019 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5020 		if (def != data)
5021 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5022 
5023 		/* set IDLE_POLL_COUNT(0x00900100) */
5024 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5025 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5026 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5027 		if (def != data)
5028 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5029 	} else {
5030 		/* Disable CGCG/CGLS */
5031 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5032 		/* disable cgcg, cgls should be disabled */
5033 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5034 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5035 		/* disable cgcg and cgls in FSM */
5036 		if (def != data)
5037 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5038 	}
5039 
5040 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5041 }
5042 
gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device * adev,bool enable)5043 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5044 						      bool enable)
5045 {
5046 	uint32_t def, data;
5047 
5048 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5049 
5050 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5051 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5052 		/* unset CGCG override */
5053 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5054 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5055 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5056 		else
5057 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5058 		/* update CGCG and CGLS override bits */
5059 		if (def != data)
5060 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5061 
5062 		/* enable cgcg FSM(0x0000363F) */
5063 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5064 
5065 		if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
5066 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5067 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5068 		else
5069 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5070 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5071 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5072 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5073 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5074 		if (def != data)
5075 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5076 
5077 		/* set IDLE_POLL_COUNT(0x00900100) */
5078 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5079 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5080 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5081 		if (def != data)
5082 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5083 	} else {
5084 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5085 		/* reset CGCG/CGLS bits */
5086 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5087 		/* disable cgcg and cgls in FSM */
5088 		if (def != data)
5089 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5090 	}
5091 
5092 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5093 }
5094 
gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device * adev,bool enable)5095 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5096 					    bool enable)
5097 {
5098 	if (enable) {
5099 		/* CGCG/CGLS should be enabled after MGCG/MGLS
5100 		 * ===  MGCG + MGLS ===
5101 		 */
5102 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5103 		/* ===  CGCG /CGLS for GFX 3D Only === */
5104 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5105 		/* ===  CGCG + CGLS === */
5106 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5107 	} else {
5108 		/* CGCG/CGLS should be disabled before MGCG/MGLS
5109 		 * ===  CGCG + CGLS ===
5110 		 */
5111 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5112 		/* ===  CGCG /CGLS for GFX 3D Only === */
5113 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5114 		/* ===  MGCG + MGLS === */
5115 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5116 	}
5117 	return 0;
5118 }
5119 
gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device * adev,unsigned int vmid)5120 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
5121 					      unsigned int vmid)
5122 {
5123 	u32 reg, data;
5124 
5125 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5126 	if (amdgpu_sriov_is_pp_one_vf(adev))
5127 		data = RREG32_NO_KIQ(reg);
5128 	else
5129 		data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5130 
5131 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5132 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5133 
5134 	if (amdgpu_sriov_is_pp_one_vf(adev))
5135 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5136 	else
5137 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5138 }
5139 
gfx_v9_0_update_spm_vmid(struct amdgpu_device * adev,struct amdgpu_ring * ring,unsigned int vmid)5140 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
5141 {
5142 	amdgpu_gfx_off_ctrl(adev, false);
5143 
5144 	gfx_v9_0_update_spm_vmid_internal(adev, vmid);
5145 
5146 	amdgpu_gfx_off_ctrl(adev, true);
5147 }
5148 
gfx_v9_0_check_rlcg_range(struct amdgpu_device * adev,uint32_t offset,struct soc15_reg_rlcg * entries,int arr_size)5149 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5150 					uint32_t offset,
5151 					struct soc15_reg_rlcg *entries, int arr_size)
5152 {
5153 	int i;
5154 	uint32_t reg;
5155 
5156 	if (!entries)
5157 		return false;
5158 
5159 	for (i = 0; i < arr_size; i++) {
5160 		const struct soc15_reg_rlcg *entry;
5161 
5162 		entry = &entries[i];
5163 		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5164 		if (offset == reg)
5165 			return true;
5166 	}
5167 
5168 	return false;
5169 }
5170 
gfx_v9_0_is_rlcg_access_range(struct amdgpu_device * adev,u32 offset)5171 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5172 {
5173 	return gfx_v9_0_check_rlcg_range(adev, offset,
5174 					(void *)rlcg_access_gc_9_0,
5175 					ARRAY_SIZE(rlcg_access_gc_9_0));
5176 }
5177 
5178 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5179 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5180 	.set_safe_mode = gfx_v9_0_set_safe_mode,
5181 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
5182 	.init = gfx_v9_0_rlc_init,
5183 	.get_csb_size = gfx_v9_0_get_csb_size,
5184 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
5185 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5186 	.resume = gfx_v9_0_rlc_resume,
5187 	.stop = gfx_v9_0_rlc_stop,
5188 	.reset = gfx_v9_0_rlc_reset,
5189 	.start = gfx_v9_0_rlc_start,
5190 	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
5191 	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5192 };
5193 
gfx_v9_0_set_powergating_state(void * handle,enum amd_powergating_state state)5194 static int gfx_v9_0_set_powergating_state(void *handle,
5195 					  enum amd_powergating_state state)
5196 {
5197 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5198 	bool enable = (state == AMD_PG_STATE_GATE);
5199 
5200 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5201 	case IP_VERSION(9, 2, 2):
5202 	case IP_VERSION(9, 1, 0):
5203 	case IP_VERSION(9, 3, 0):
5204 		if (!enable)
5205 			amdgpu_gfx_off_ctrl(adev, false);
5206 
5207 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5208 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5209 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5210 		} else {
5211 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5212 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5213 		}
5214 
5215 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5216 			gfx_v9_0_enable_cp_power_gating(adev, true);
5217 		else
5218 			gfx_v9_0_enable_cp_power_gating(adev, false);
5219 
5220 		/* update gfx cgpg state */
5221 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5222 
5223 		/* update mgcg state */
5224 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5225 
5226 		if (enable)
5227 			amdgpu_gfx_off_ctrl(adev, true);
5228 		break;
5229 	case IP_VERSION(9, 2, 1):
5230 		amdgpu_gfx_off_ctrl(adev, enable);
5231 		break;
5232 	default:
5233 		break;
5234 	}
5235 
5236 	return 0;
5237 }
5238 
gfx_v9_0_set_clockgating_state(void * handle,enum amd_clockgating_state state)5239 static int gfx_v9_0_set_clockgating_state(void *handle,
5240 					  enum amd_clockgating_state state)
5241 {
5242 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5243 
5244 	if (amdgpu_sriov_vf(adev))
5245 		return 0;
5246 
5247 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5248 	case IP_VERSION(9, 0, 1):
5249 	case IP_VERSION(9, 2, 1):
5250 	case IP_VERSION(9, 4, 0):
5251 	case IP_VERSION(9, 2, 2):
5252 	case IP_VERSION(9, 1, 0):
5253 	case IP_VERSION(9, 4, 1):
5254 	case IP_VERSION(9, 3, 0):
5255 	case IP_VERSION(9, 4, 2):
5256 		gfx_v9_0_update_gfx_clock_gating(adev,
5257 						 state == AMD_CG_STATE_GATE);
5258 		break;
5259 	default:
5260 		break;
5261 	}
5262 	return 0;
5263 }
5264 
gfx_v9_0_get_clockgating_state(void * handle,u64 * flags)5265 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5266 {
5267 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5268 	int data;
5269 
5270 	if (amdgpu_sriov_vf(adev))
5271 		*flags = 0;
5272 
5273 	/* AMD_CG_SUPPORT_GFX_MGCG */
5274 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5275 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5276 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5277 
5278 	/* AMD_CG_SUPPORT_GFX_CGCG */
5279 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5280 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5281 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5282 
5283 	/* AMD_CG_SUPPORT_GFX_CGLS */
5284 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5285 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5286 
5287 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5288 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5289 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5290 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5291 
5292 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5293 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5294 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5295 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5296 
5297 	if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) {
5298 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5299 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5300 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5301 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5302 
5303 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5304 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5305 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5306 	}
5307 }
5308 
gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring * ring)5309 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5310 {
5311 	return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5312 }
5313 
gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring * ring)5314 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5315 {
5316 	struct amdgpu_device *adev = ring->adev;
5317 	u64 wptr;
5318 
5319 	/* XXX check if swapping is necessary on BE */
5320 	if (ring->use_doorbell) {
5321 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5322 	} else {
5323 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5324 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5325 	}
5326 
5327 	return wptr;
5328 }
5329 
gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring * ring)5330 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5331 {
5332 	struct amdgpu_device *adev = ring->adev;
5333 
5334 	if (ring->use_doorbell) {
5335 		/* XXX check if swapping is necessary on BE */
5336 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5337 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5338 	} else {
5339 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5340 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5341 	}
5342 }
5343 
gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring * ring)5344 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5345 {
5346 	struct amdgpu_device *adev = ring->adev;
5347 	u32 ref_and_mask, reg_mem_engine;
5348 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5349 
5350 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5351 		switch (ring->me) {
5352 		case 1:
5353 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5354 			break;
5355 		case 2:
5356 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5357 			break;
5358 		default:
5359 			return;
5360 		}
5361 		reg_mem_engine = 0;
5362 	} else {
5363 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5364 		reg_mem_engine = 1; /* pfp */
5365 	}
5366 
5367 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5368 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5369 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5370 			      ref_and_mask, ref_and_mask, 0x20);
5371 }
5372 
gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)5373 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5374 					struct amdgpu_job *job,
5375 					struct amdgpu_ib *ib,
5376 					uint32_t flags)
5377 {
5378 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5379 	u32 header, control = 0;
5380 
5381 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5382 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5383 	else
5384 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5385 
5386 	control |= ib->length_dw | (vmid << 24);
5387 
5388 	if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5389 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5390 
5391 		if (flags & AMDGPU_IB_PREEMPTED)
5392 			control |= INDIRECT_BUFFER_PRE_RESUME(1);
5393 
5394 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5395 			gfx_v9_0_ring_emit_de_meta(ring,
5396 						   (!amdgpu_sriov_vf(ring->adev) &&
5397 						   flags & AMDGPU_IB_PREEMPTED) ?
5398 						   true : false,
5399 						   job->gds_size > 0 && job->gds_base != 0);
5400 	}
5401 
5402 	amdgpu_ring_write(ring, header);
5403 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5404 	amdgpu_ring_write(ring,
5405 #ifdef __BIG_ENDIAN
5406 		(2 << 0) |
5407 #endif
5408 		lower_32_bits(ib->gpu_addr));
5409 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5410 	amdgpu_ring_ib_on_emit_cntl(ring);
5411 	amdgpu_ring_write(ring, control);
5412 }
5413 
gfx_v9_0_ring_patch_cntl(struct amdgpu_ring * ring,unsigned offset)5414 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
5415 				     unsigned offset)
5416 {
5417 	u32 control = ring->ring[offset];
5418 
5419 	control |= INDIRECT_BUFFER_PRE_RESUME(1);
5420 	ring->ring[offset] = control;
5421 }
5422 
gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring * ring,unsigned offset)5423 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
5424 					unsigned offset)
5425 {
5426 	struct amdgpu_device *adev = ring->adev;
5427 	void *ce_payload_cpu_addr;
5428 	uint64_t payload_offset, payload_size;
5429 
5430 	payload_size = sizeof(struct v9_ce_ib_state);
5431 
5432 	if (ring->is_mes_queue) {
5433 		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5434 					  gfx[0].gfx_meta_data) +
5435 			offsetof(struct v9_gfx_meta_data, ce_payload);
5436 		ce_payload_cpu_addr =
5437 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5438 	} else {
5439 		payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5440 		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5441 	}
5442 
5443 	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5444 		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
5445 	} else {
5446 		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
5447 		       (ring->buf_mask + 1 - offset) << 2);
5448 		payload_size -= (ring->buf_mask + 1 - offset) << 2;
5449 		memcpy((void *)&ring->ring[0],
5450 		       ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5451 		       payload_size);
5452 	}
5453 }
5454 
gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring * ring,unsigned offset)5455 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
5456 					unsigned offset)
5457 {
5458 	struct amdgpu_device *adev = ring->adev;
5459 	void *de_payload_cpu_addr;
5460 	uint64_t payload_offset, payload_size;
5461 
5462 	payload_size = sizeof(struct v9_de_ib_state);
5463 
5464 	if (ring->is_mes_queue) {
5465 		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5466 					  gfx[0].gfx_meta_data) +
5467 			offsetof(struct v9_gfx_meta_data, de_payload);
5468 		de_payload_cpu_addr =
5469 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5470 	} else {
5471 		payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
5472 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5473 	}
5474 
5475 	((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
5476 		IB_COMPLETION_STATUS_PREEMPTED;
5477 
5478 	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5479 		memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
5480 	} else {
5481 		memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
5482 		       (ring->buf_mask + 1 - offset) << 2);
5483 		payload_size -= (ring->buf_mask + 1 - offset) << 2;
5484 		memcpy((void *)&ring->ring[0],
5485 		       de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5486 		       payload_size);
5487 	}
5488 }
5489 
gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)5490 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5491 					  struct amdgpu_job *job,
5492 					  struct amdgpu_ib *ib,
5493 					  uint32_t flags)
5494 {
5495 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5496 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5497 
5498 	/* Currently, there is a high possibility to get wave ID mismatch
5499 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5500 	 * different wave IDs than the GDS expects. This situation happens
5501 	 * randomly when at least 5 compute pipes use GDS ordered append.
5502 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5503 	 * Those are probably bugs somewhere else in the kernel driver.
5504 	 *
5505 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5506 	 * GDS to 0 for this ring (me/pipe).
5507 	 */
5508 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5509 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5510 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5511 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5512 	}
5513 
5514 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5515 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5516 	amdgpu_ring_write(ring,
5517 #ifdef __BIG_ENDIAN
5518 				(2 << 0) |
5519 #endif
5520 				lower_32_bits(ib->gpu_addr));
5521 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5522 	amdgpu_ring_write(ring, control);
5523 }
5524 
gfx_v9_0_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)5525 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5526 				     u64 seq, unsigned flags)
5527 {
5528 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5529 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5530 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5531 	bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5532 	uint32_t dw2 = 0;
5533 
5534 	/* RELEASE_MEM - flush caches, send int */
5535 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5536 
5537 	if (writeback) {
5538 		dw2 = EOP_TC_NC_ACTION_EN;
5539 	} else {
5540 		dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5541 				EOP_TC_MD_ACTION_EN;
5542 	}
5543 	dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5544 				EVENT_INDEX(5);
5545 	if (exec)
5546 		dw2 |= EOP_EXEC;
5547 
5548 	amdgpu_ring_write(ring, dw2);
5549 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5550 
5551 	/*
5552 	 * the address should be Qword aligned if 64bit write, Dword
5553 	 * aligned if only send 32bit data low (discard data high)
5554 	 */
5555 	if (write64bit)
5556 		BUG_ON(addr & 0x7);
5557 	else
5558 		BUG_ON(addr & 0x3);
5559 	amdgpu_ring_write(ring, lower_32_bits(addr));
5560 	amdgpu_ring_write(ring, upper_32_bits(addr));
5561 	amdgpu_ring_write(ring, lower_32_bits(seq));
5562 	amdgpu_ring_write(ring, upper_32_bits(seq));
5563 	amdgpu_ring_write(ring, 0);
5564 }
5565 
gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring * ring)5566 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5567 {
5568 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5569 	uint32_t seq = ring->fence_drv.sync_seq;
5570 	uint64_t addr = ring->fence_drv.gpu_addr;
5571 
5572 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5573 			      lower_32_bits(addr), upper_32_bits(addr),
5574 			      seq, 0xffffffff, 4);
5575 }
5576 
gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)5577 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5578 					unsigned vmid, uint64_t pd_addr)
5579 {
5580 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5581 
5582 	/* compute doesn't have PFP */
5583 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5584 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5585 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5586 		amdgpu_ring_write(ring, 0x0);
5587 	}
5588 }
5589 
gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring * ring)5590 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5591 {
5592 	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5593 }
5594 
gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring * ring)5595 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5596 {
5597 	u64 wptr;
5598 
5599 	/* XXX check if swapping is necessary on BE */
5600 	if (ring->use_doorbell)
5601 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5602 	else
5603 		BUG();
5604 	return wptr;
5605 }
5606 
gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring * ring)5607 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5608 {
5609 	struct amdgpu_device *adev = ring->adev;
5610 
5611 	/* XXX check if swapping is necessary on BE */
5612 	if (ring->use_doorbell) {
5613 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5614 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5615 	} else{
5616 		BUG(); /* only DOORBELL method supported on gfx9 now */
5617 	}
5618 }
5619 
gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned int flags)5620 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5621 					 u64 seq, unsigned int flags)
5622 {
5623 	struct amdgpu_device *adev = ring->adev;
5624 
5625 	/* we only allocate 32bit for each seq wb address */
5626 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5627 
5628 	/* write fence seq to the "addr" */
5629 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5630 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5631 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5632 	amdgpu_ring_write(ring, lower_32_bits(addr));
5633 	amdgpu_ring_write(ring, upper_32_bits(addr));
5634 	amdgpu_ring_write(ring, lower_32_bits(seq));
5635 
5636 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5637 		/* set register to trigger INT */
5638 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5639 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5640 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5641 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5642 		amdgpu_ring_write(ring, 0);
5643 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5644 	}
5645 }
5646 
gfx_v9_ring_emit_sb(struct amdgpu_ring * ring)5647 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5648 {
5649 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5650 	amdgpu_ring_write(ring, 0);
5651 }
5652 
gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring * ring,bool resume)5653 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5654 {
5655 	struct amdgpu_device *adev = ring->adev;
5656 	struct v9_ce_ib_state ce_payload = {0};
5657 	uint64_t offset, ce_payload_gpu_addr;
5658 	void *ce_payload_cpu_addr;
5659 	int cnt;
5660 
5661 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5662 
5663 	if (ring->is_mes_queue) {
5664 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5665 				  gfx[0].gfx_meta_data) +
5666 			offsetof(struct v9_gfx_meta_data, ce_payload);
5667 		ce_payload_gpu_addr =
5668 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5669 		ce_payload_cpu_addr =
5670 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5671 	} else {
5672 		offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5673 		ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5674 		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5675 	}
5676 
5677 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5678 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5679 				 WRITE_DATA_DST_SEL(8) |
5680 				 WR_CONFIRM) |
5681 				 WRITE_DATA_CACHE_POLICY(0));
5682 	amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5683 	amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5684 
5685 	amdgpu_ring_ib_on_emit_ce(ring);
5686 
5687 	if (resume)
5688 		amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5689 					   sizeof(ce_payload) >> 2);
5690 	else
5691 		amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5692 					   sizeof(ce_payload) >> 2);
5693 }
5694 
gfx_v9_0_ring_preempt_ib(struct amdgpu_ring * ring)5695 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5696 {
5697 	int i, r = 0;
5698 	struct amdgpu_device *adev = ring->adev;
5699 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5700 	struct amdgpu_ring *kiq_ring = &kiq->ring;
5701 	unsigned long flags;
5702 
5703 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5704 		return -EINVAL;
5705 
5706 	spin_lock_irqsave(&kiq->ring_lock, flags);
5707 
5708 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5709 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
5710 		return -ENOMEM;
5711 	}
5712 
5713 	/* assert preemption condition */
5714 	amdgpu_ring_set_preempt_cond_exec(ring, false);
5715 
5716 	ring->trail_seq += 1;
5717 	amdgpu_ring_alloc(ring, 13);
5718 	gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5719 				 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5720 
5721 	/* assert IB preemption, emit the trailing fence */
5722 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5723 				   ring->trail_fence_gpu_addr,
5724 				   ring->trail_seq);
5725 
5726 	amdgpu_ring_commit(kiq_ring);
5727 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
5728 
5729 	/* poll the trailing fence */
5730 	for (i = 0; i < adev->usec_timeout; i++) {
5731 		if (ring->trail_seq ==
5732 			le32_to_cpu(*ring->trail_fence_cpu_addr))
5733 			break;
5734 		udelay(1);
5735 	}
5736 
5737 	if (i >= adev->usec_timeout) {
5738 		r = -EINVAL;
5739 		DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5740 	}
5741 
5742 	/*reset the CP_VMID_PREEMPT after trailing fence*/
5743 	amdgpu_ring_emit_wreg(ring,
5744 			      SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5745 			      0x0);
5746 	amdgpu_ring_commit(ring);
5747 
5748 	/* deassert preemption condition */
5749 	amdgpu_ring_set_preempt_cond_exec(ring, true);
5750 	return r;
5751 }
5752 
gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring * ring,bool resume,bool usegds)5753 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
5754 {
5755 	struct amdgpu_device *adev = ring->adev;
5756 	struct v9_de_ib_state de_payload = {0};
5757 	uint64_t offset, gds_addr, de_payload_gpu_addr;
5758 	void *de_payload_cpu_addr;
5759 	int cnt;
5760 
5761 	if (ring->is_mes_queue) {
5762 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5763 				  gfx[0].gfx_meta_data) +
5764 			offsetof(struct v9_gfx_meta_data, de_payload);
5765 		de_payload_gpu_addr =
5766 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5767 		de_payload_cpu_addr =
5768 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5769 
5770 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5771 				  gfx[0].gds_backup) +
5772 			offsetof(struct v9_gfx_meta_data, de_payload);
5773 		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5774 	} else {
5775 		offset = offsetof(struct v9_gfx_meta_data, de_payload);
5776 		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5777 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5778 
5779 		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5780 				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
5781 				 PAGE_SIZE);
5782 	}
5783 
5784 	if (usegds) {
5785 		de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5786 		de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5787 	}
5788 
5789 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5790 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5791 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5792 				 WRITE_DATA_DST_SEL(8) |
5793 				 WR_CONFIRM) |
5794 				 WRITE_DATA_CACHE_POLICY(0));
5795 	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5796 	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5797 
5798 	amdgpu_ring_ib_on_emit_de(ring);
5799 	if (resume)
5800 		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5801 					   sizeof(de_payload) >> 2);
5802 	else
5803 		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5804 					   sizeof(de_payload) >> 2);
5805 }
5806 
gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring * ring,bool start,bool secure)5807 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5808 				   bool secure)
5809 {
5810 	uint32_t v = secure ? FRAME_TMZ : 0;
5811 
5812 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5813 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5814 }
5815 
gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring * ring,uint32_t flags)5816 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5817 {
5818 	uint32_t dw2 = 0;
5819 
5820 	gfx_v9_0_ring_emit_ce_meta(ring,
5821 				   (!amdgpu_sriov_vf(ring->adev) &&
5822 				   flags & AMDGPU_IB_PREEMPTED) ? true : false);
5823 
5824 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5825 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5826 		/* set load_global_config & load_global_uconfig */
5827 		dw2 |= 0x8001;
5828 		/* set load_cs_sh_regs */
5829 		dw2 |= 0x01000000;
5830 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5831 		dw2 |= 0x10002;
5832 
5833 		/* set load_ce_ram if preamble presented */
5834 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5835 			dw2 |= 0x10000000;
5836 	} else {
5837 		/* still load_ce_ram if this is the first time preamble presented
5838 		 * although there is no context switch happens.
5839 		 */
5840 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5841 			dw2 |= 0x10000000;
5842 	}
5843 
5844 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5845 	amdgpu_ring_write(ring, dw2);
5846 	amdgpu_ring_write(ring, 0);
5847 }
5848 
gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring * ring,uint64_t addr)5849 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5850 						  uint64_t addr)
5851 {
5852 	unsigned ret;
5853 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5854 	amdgpu_ring_write(ring, lower_32_bits(addr));
5855 	amdgpu_ring_write(ring, upper_32_bits(addr));
5856 	/* discard following DWs if *cond_exec_gpu_addr==0 */
5857 	amdgpu_ring_write(ring, 0);
5858 	ret = ring->wptr & ring->buf_mask;
5859 	/* patch dummy value later */
5860 	amdgpu_ring_write(ring, 0);
5861 	return ret;
5862 }
5863 
gfx_v9_0_ring_emit_rreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t reg_val_offs)5864 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5865 				    uint32_t reg_val_offs)
5866 {
5867 	struct amdgpu_device *adev = ring->adev;
5868 
5869 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5870 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5871 				(5 << 8) |	/* dst: memory */
5872 				(1 << 20));	/* write confirm */
5873 	amdgpu_ring_write(ring, reg);
5874 	amdgpu_ring_write(ring, 0);
5875 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5876 				reg_val_offs * 4));
5877 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5878 				reg_val_offs * 4));
5879 }
5880 
gfx_v9_0_ring_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)5881 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5882 				    uint32_t val)
5883 {
5884 	uint32_t cmd = 0;
5885 
5886 	switch (ring->funcs->type) {
5887 	case AMDGPU_RING_TYPE_GFX:
5888 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5889 		break;
5890 	case AMDGPU_RING_TYPE_KIQ:
5891 		cmd = (1 << 16); /* no inc addr */
5892 		break;
5893 	default:
5894 		cmd = WR_CONFIRM;
5895 		break;
5896 	}
5897 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5898 	amdgpu_ring_write(ring, cmd);
5899 	amdgpu_ring_write(ring, reg);
5900 	amdgpu_ring_write(ring, 0);
5901 	amdgpu_ring_write(ring, val);
5902 }
5903 
gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)5904 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5905 					uint32_t val, uint32_t mask)
5906 {
5907 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5908 }
5909 
gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring * ring,uint32_t reg0,uint32_t reg1,uint32_t ref,uint32_t mask)5910 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5911 						  uint32_t reg0, uint32_t reg1,
5912 						  uint32_t ref, uint32_t mask)
5913 {
5914 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5915 	struct amdgpu_device *adev = ring->adev;
5916 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5917 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5918 
5919 	if (fw_version_ok)
5920 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5921 				      ref, mask, 0x20);
5922 	else
5923 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5924 							   ref, mask);
5925 }
5926 
gfx_v9_0_ring_soft_recovery(struct amdgpu_ring * ring,unsigned vmid)5927 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5928 {
5929 	struct amdgpu_device *adev = ring->adev;
5930 	uint32_t value = 0;
5931 
5932 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5933 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5934 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5935 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5936 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5937 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5938 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5939 }
5940 
gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device * adev,enum amdgpu_interrupt_state state)5941 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5942 						 enum amdgpu_interrupt_state state)
5943 {
5944 	switch (state) {
5945 	case AMDGPU_IRQ_STATE_DISABLE:
5946 	case AMDGPU_IRQ_STATE_ENABLE:
5947 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5948 			       TIME_STAMP_INT_ENABLE,
5949 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5950 		break;
5951 	default:
5952 		break;
5953 	}
5954 }
5955 
gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device * adev,int me,int pipe,enum amdgpu_interrupt_state state)5956 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5957 						     int me, int pipe,
5958 						     enum amdgpu_interrupt_state state)
5959 {
5960 	u32 mec_int_cntl, mec_int_cntl_reg;
5961 
5962 	/*
5963 	 * amdgpu controls only the first MEC. That's why this function only
5964 	 * handles the setting of interrupts for this specific MEC. All other
5965 	 * pipes' interrupts are set by amdkfd.
5966 	 */
5967 
5968 	if (me == 1) {
5969 		switch (pipe) {
5970 		case 0:
5971 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5972 			break;
5973 		case 1:
5974 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5975 			break;
5976 		case 2:
5977 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5978 			break;
5979 		case 3:
5980 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5981 			break;
5982 		default:
5983 			DRM_DEBUG("invalid pipe %d\n", pipe);
5984 			return;
5985 		}
5986 	} else {
5987 		DRM_DEBUG("invalid me %d\n", me);
5988 		return;
5989 	}
5990 
5991 	switch (state) {
5992 	case AMDGPU_IRQ_STATE_DISABLE:
5993 		mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5994 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5995 					     TIME_STAMP_INT_ENABLE, 0);
5996 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5997 		break;
5998 	case AMDGPU_IRQ_STATE_ENABLE:
5999 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6000 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6001 					     TIME_STAMP_INT_ENABLE, 1);
6002 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6003 		break;
6004 	default:
6005 		break;
6006 	}
6007 }
6008 
gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device * adev,int me,int pipe)6009 static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev,
6010 				     int me, int pipe)
6011 {
6012 	/*
6013 	 * amdgpu controls only the first MEC. That's why this function only
6014 	 * handles the setting of interrupts for this specific MEC. All other
6015 	 * pipes' interrupts are set by amdkfd.
6016 	 */
6017 	if (me != 1)
6018 		return 0;
6019 
6020 	switch (pipe) {
6021 	case 0:
6022 		return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
6023 	case 1:
6024 		return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
6025 	case 2:
6026 		return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
6027 	case 3:
6028 		return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
6029 	default:
6030 		return 0;
6031 	}
6032 }
6033 
gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6034 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6035 					     struct amdgpu_irq_src *source,
6036 					     unsigned type,
6037 					     enum amdgpu_interrupt_state state)
6038 {
6039 	u32 cp_int_cntl_reg, cp_int_cntl;
6040 	int i, j;
6041 
6042 	switch (state) {
6043 	case AMDGPU_IRQ_STATE_DISABLE:
6044 	case AMDGPU_IRQ_STATE_ENABLE:
6045 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6046 			       PRIV_REG_INT_ENABLE,
6047 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6048 		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6049 			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6050 				/* MECs start at 1 */
6051 				cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
6052 
6053 				if (cp_int_cntl_reg) {
6054 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6055 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6056 								    PRIV_REG_INT_ENABLE,
6057 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6058 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6059 				}
6060 			}
6061 		}
6062 		break;
6063 	default:
6064 		break;
6065 	}
6066 
6067 	return 0;
6068 }
6069 
gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6070 static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev,
6071 					   struct amdgpu_irq_src *source,
6072 					   unsigned type,
6073 					   enum amdgpu_interrupt_state state)
6074 {
6075 	u32 cp_int_cntl_reg, cp_int_cntl;
6076 	int i, j;
6077 
6078 	switch (state) {
6079 	case AMDGPU_IRQ_STATE_DISABLE:
6080 	case AMDGPU_IRQ_STATE_ENABLE:
6081 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6082 			       OPCODE_ERROR_INT_ENABLE,
6083 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6084 		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6085 			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6086 				/* MECs start at 1 */
6087 				cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
6088 
6089 				if (cp_int_cntl_reg) {
6090 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6091 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6092 								    OPCODE_ERROR_INT_ENABLE,
6093 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6094 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6095 				}
6096 			}
6097 		}
6098 		break;
6099 	default:
6100 		break;
6101 	}
6102 
6103 	return 0;
6104 }
6105 
gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6106 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6107 					      struct amdgpu_irq_src *source,
6108 					      unsigned type,
6109 					      enum amdgpu_interrupt_state state)
6110 {
6111 	switch (state) {
6112 	case AMDGPU_IRQ_STATE_DISABLE:
6113 	case AMDGPU_IRQ_STATE_ENABLE:
6114 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6115 			       PRIV_INSTR_INT_ENABLE,
6116 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6117 		break;
6118 	default:
6119 		break;
6120 	}
6121 
6122 	return 0;
6123 }
6124 
6125 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
6126 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
6127 			CP_ECC_ERROR_INT_ENABLE, 1)
6128 
6129 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
6130 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
6131 			CP_ECC_ERROR_INT_ENABLE, 0)
6132 
gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6133 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
6134 					      struct amdgpu_irq_src *source,
6135 					      unsigned type,
6136 					      enum amdgpu_interrupt_state state)
6137 {
6138 	switch (state) {
6139 	case AMDGPU_IRQ_STATE_DISABLE:
6140 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6141 				CP_ECC_ERROR_INT_ENABLE, 0);
6142 		DISABLE_ECC_ON_ME_PIPE(1, 0);
6143 		DISABLE_ECC_ON_ME_PIPE(1, 1);
6144 		DISABLE_ECC_ON_ME_PIPE(1, 2);
6145 		DISABLE_ECC_ON_ME_PIPE(1, 3);
6146 		break;
6147 
6148 	case AMDGPU_IRQ_STATE_ENABLE:
6149 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6150 				CP_ECC_ERROR_INT_ENABLE, 1);
6151 		ENABLE_ECC_ON_ME_PIPE(1, 0);
6152 		ENABLE_ECC_ON_ME_PIPE(1, 1);
6153 		ENABLE_ECC_ON_ME_PIPE(1, 2);
6154 		ENABLE_ECC_ON_ME_PIPE(1, 3);
6155 		break;
6156 	default:
6157 		break;
6158 	}
6159 
6160 	return 0;
6161 }
6162 
6163 
gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * src,unsigned type,enum amdgpu_interrupt_state state)6164 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6165 					    struct amdgpu_irq_src *src,
6166 					    unsigned type,
6167 					    enum amdgpu_interrupt_state state)
6168 {
6169 	switch (type) {
6170 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6171 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
6172 		break;
6173 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6174 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6175 		break;
6176 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6177 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6178 		break;
6179 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6180 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6181 		break;
6182 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6183 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6184 		break;
6185 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6186 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6187 		break;
6188 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6189 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6190 		break;
6191 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6192 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6193 		break;
6194 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6195 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6196 		break;
6197 	default:
6198 		break;
6199 	}
6200 	return 0;
6201 }
6202 
gfx_v9_0_eop_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6203 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
6204 			    struct amdgpu_irq_src *source,
6205 			    struct amdgpu_iv_entry *entry)
6206 {
6207 	int i;
6208 	u8 me_id, pipe_id, queue_id;
6209 	struct amdgpu_ring *ring;
6210 
6211 	DRM_DEBUG("IH: CP EOP\n");
6212 	me_id = (entry->ring_id & 0x0c) >> 2;
6213 	pipe_id = (entry->ring_id & 0x03) >> 0;
6214 	queue_id = (entry->ring_id & 0x70) >> 4;
6215 
6216 	switch (me_id) {
6217 	case 0:
6218 		if (adev->gfx.num_gfx_rings) {
6219 			if (!adev->gfx.mcbp) {
6220 				amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6221 			} else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
6222 				/* Fence signals are handled on the software rings*/
6223 				for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
6224 					amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
6225 			}
6226 		}
6227 		break;
6228 	case 1:
6229 	case 2:
6230 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6231 			ring = &adev->gfx.compute_ring[i];
6232 			/* Per-queue interrupt is supported for MEC starting from VI.
6233 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6234 			  */
6235 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6236 				amdgpu_fence_process(ring);
6237 		}
6238 		break;
6239 	}
6240 	return 0;
6241 }
6242 
gfx_v9_0_fault(struct amdgpu_device * adev,struct amdgpu_iv_entry * entry)6243 static void gfx_v9_0_fault(struct amdgpu_device *adev,
6244 			   struct amdgpu_iv_entry *entry)
6245 {
6246 	u8 me_id, pipe_id, queue_id;
6247 	struct amdgpu_ring *ring;
6248 	int i;
6249 
6250 	me_id = (entry->ring_id & 0x0c) >> 2;
6251 	pipe_id = (entry->ring_id & 0x03) >> 0;
6252 	queue_id = (entry->ring_id & 0x70) >> 4;
6253 
6254 	switch (me_id) {
6255 	case 0:
6256 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6257 		break;
6258 	case 1:
6259 	case 2:
6260 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6261 			ring = &adev->gfx.compute_ring[i];
6262 			if (ring->me == me_id && ring->pipe == pipe_id &&
6263 			    ring->queue == queue_id)
6264 				drm_sched_fault(&ring->sched);
6265 		}
6266 		break;
6267 	}
6268 }
6269 
gfx_v9_0_priv_reg_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6270 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
6271 				 struct amdgpu_irq_src *source,
6272 				 struct amdgpu_iv_entry *entry)
6273 {
6274 	DRM_ERROR("Illegal register access in command stream\n");
6275 	gfx_v9_0_fault(adev, entry);
6276 	return 0;
6277 }
6278 
gfx_v9_0_bad_op_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6279 static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev,
6280 			       struct amdgpu_irq_src *source,
6281 			       struct amdgpu_iv_entry *entry)
6282 {
6283 	DRM_ERROR("Illegal opcode in command stream\n");
6284 	gfx_v9_0_fault(adev, entry);
6285 	return 0;
6286 }
6287 
gfx_v9_0_priv_inst_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6288 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
6289 				  struct amdgpu_irq_src *source,
6290 				  struct amdgpu_iv_entry *entry)
6291 {
6292 	DRM_ERROR("Illegal instruction in command stream\n");
6293 	gfx_v9_0_fault(adev, entry);
6294 	return 0;
6295 }
6296 
6297 
6298 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
6299 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
6300 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
6301 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
6302 	},
6303 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
6304 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
6305 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
6306 	},
6307 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6308 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
6309 	  0, 0
6310 	},
6311 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6312 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6313 	  0, 0
6314 	},
6315 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6316 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6317 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6318 	},
6319 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6320 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6321 	  0, 0
6322 	},
6323 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6324 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6325 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6326 	},
6327 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6328 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6329 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6330 	},
6331 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6332 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6333 	  0, 0
6334 	},
6335 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6336 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6337 	  0, 0
6338 	},
6339 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6340 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6341 	  0, 0
6342 	},
6343 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6344 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6345 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6346 	},
6347 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6348 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6349 	  0, 0
6350 	},
6351 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6352 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6353 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6354 	},
6355 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6356 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6357 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6358 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6359 	},
6360 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6361 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6362 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6363 	  0, 0
6364 	},
6365 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6366 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6367 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6368 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6369 	},
6370 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6371 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6372 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6373 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6374 	},
6375 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6376 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6377 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6378 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6379 	},
6380 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6381 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6382 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6383 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6384 	},
6385 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6386 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6387 	  0, 0
6388 	},
6389 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6390 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6391 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6392 	},
6393 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6394 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6395 	  0, 0
6396 	},
6397 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6398 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6399 	  0, 0
6400 	},
6401 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6402 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6403 	  0, 0
6404 	},
6405 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6406 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6407 	  0, 0
6408 	},
6409 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6410 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6411 	  0, 0
6412 	},
6413 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6414 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6415 	  0, 0
6416 	},
6417 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6418 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6419 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6420 	},
6421 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6422 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6423 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6424 	},
6425 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6426 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6427 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6428 	},
6429 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6430 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6431 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6432 	},
6433 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6434 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6435 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6436 	},
6437 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6438 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6439 	  0, 0
6440 	},
6441 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6442 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6443 	  0, 0
6444 	},
6445 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6446 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6447 	  0, 0
6448 	},
6449 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6450 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6451 	  0, 0
6452 	},
6453 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6454 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6455 	  0, 0
6456 	},
6457 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6458 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6459 	  0, 0
6460 	},
6461 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6462 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6463 	  0, 0
6464 	},
6465 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6466 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6467 	  0, 0
6468 	},
6469 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6470 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6471 	  0, 0
6472 	},
6473 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6474 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6475 	  0, 0
6476 	},
6477 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6478 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6479 	  0, 0
6480 	},
6481 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6482 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6483 	  0, 0
6484 	},
6485 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6486 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6487 	  0, 0
6488 	},
6489 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6490 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6491 	  0, 0
6492 	},
6493 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6494 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6495 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6496 	},
6497 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6498 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6499 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6500 	},
6501 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6502 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6503 	  0, 0
6504 	},
6505 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6506 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6507 	  0, 0
6508 	},
6509 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6510 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6511 	  0, 0
6512 	},
6513 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6514 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6515 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6516 	},
6517 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6518 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6519 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6520 	},
6521 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6522 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6523 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6524 	},
6525 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6526 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6527 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6528 	},
6529 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6530 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6531 	  0, 0
6532 	},
6533 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6534 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6535 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6536 	},
6537 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6538 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6539 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6540 	},
6541 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6542 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6543 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6544 	},
6545 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6546 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6547 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6548 	},
6549 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6550 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6551 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6552 	},
6553 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6554 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6555 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6556 	},
6557 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6558 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6559 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6560 	},
6561 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6562 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6563 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6564 	},
6565 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6566 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6567 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6568 	},
6569 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6570 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6571 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6572 	},
6573 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6574 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6575 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6576 	},
6577 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6578 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6579 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6580 	},
6581 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6582 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6583 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6584 	},
6585 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6586 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6587 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6588 	},
6589 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6590 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6591 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6592 	},
6593 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6594 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6595 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6596 	},
6597 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6598 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6599 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6600 	},
6601 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6602 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6603 	  0, 0
6604 	},
6605 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6606 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6607 	  0, 0
6608 	},
6609 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6610 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6611 	  0, 0
6612 	},
6613 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6614 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6615 	  0, 0
6616 	},
6617 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6618 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6619 	  0, 0
6620 	},
6621 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6622 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6623 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6624 	},
6625 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6626 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6627 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6628 	},
6629 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6630 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6631 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6632 	},
6633 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6634 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6635 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6636 	},
6637 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6638 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6639 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6640 	},
6641 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6642 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6643 	  0, 0
6644 	},
6645 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6646 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6647 	  0, 0
6648 	},
6649 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6650 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6651 	  0, 0
6652 	},
6653 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6654 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6655 	  0, 0
6656 	},
6657 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6658 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6659 	  0, 0
6660 	},
6661 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6662 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6663 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6664 	},
6665 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6666 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6667 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6668 	},
6669 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6670 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6671 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6672 	},
6673 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6674 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6675 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6676 	},
6677 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6678 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6679 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6680 	},
6681 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6682 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6683 	  0, 0
6684 	},
6685 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6686 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6687 	  0, 0
6688 	},
6689 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6690 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6691 	  0, 0
6692 	},
6693 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6694 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6695 	  0, 0
6696 	},
6697 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6698 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6699 	  0, 0
6700 	},
6701 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6702 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6703 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6704 	},
6705 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6706 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6707 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6708 	},
6709 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6710 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6711 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6712 	},
6713 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6714 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6715 	  0, 0
6716 	},
6717 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6718 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6719 	  0, 0
6720 	},
6721 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6722 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6723 	  0, 0
6724 	},
6725 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6726 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6727 	  0, 0
6728 	},
6729 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6730 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6731 	  0, 0
6732 	},
6733 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6734 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6735 	  0, 0
6736 	}
6737 };
6738 
gfx_v9_0_ras_error_inject(struct amdgpu_device * adev,void * inject_if,uint32_t instance_mask)6739 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6740 				     void *inject_if, uint32_t instance_mask)
6741 {
6742 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6743 	int ret;
6744 	struct ta_ras_trigger_error_input block_info = { 0 };
6745 
6746 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6747 		return -EINVAL;
6748 
6749 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6750 		return -EINVAL;
6751 
6752 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6753 		return -EPERM;
6754 
6755 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6756 	      info->head.type)) {
6757 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6758 			ras_gfx_subblocks[info->head.sub_block_index].name,
6759 			info->head.type);
6760 		return -EPERM;
6761 	}
6762 
6763 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6764 	      info->head.type)) {
6765 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6766 			ras_gfx_subblocks[info->head.sub_block_index].name,
6767 			info->head.type);
6768 		return -EPERM;
6769 	}
6770 
6771 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6772 	block_info.sub_block_index =
6773 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6774 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6775 	block_info.address = info->address;
6776 	block_info.value = info->value;
6777 
6778 	mutex_lock(&adev->grbm_idx_mutex);
6779 	ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
6780 	mutex_unlock(&adev->grbm_idx_mutex);
6781 
6782 	return ret;
6783 }
6784 
6785 static const char * const vml2_mems[] = {
6786 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6787 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6788 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6789 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6790 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6791 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6792 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6793 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6794 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6795 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6796 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6797 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6798 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6799 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6800 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6801 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6802 };
6803 
6804 static const char * const vml2_walker_mems[] = {
6805 	"UTC_VML2_CACHE_PDE0_MEM0",
6806 	"UTC_VML2_CACHE_PDE0_MEM1",
6807 	"UTC_VML2_CACHE_PDE1_MEM0",
6808 	"UTC_VML2_CACHE_PDE1_MEM1",
6809 	"UTC_VML2_CACHE_PDE2_MEM0",
6810 	"UTC_VML2_CACHE_PDE2_MEM1",
6811 	"UTC_VML2_RDIF_LOG_FIFO",
6812 };
6813 
6814 static const char * const atc_l2_cache_2m_mems[] = {
6815 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6816 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6817 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6818 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6819 };
6820 
6821 static const char *atc_l2_cache_4k_mems[] = {
6822 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6823 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6824 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6825 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6826 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6827 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6828 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6829 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6830 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6831 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6832 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6833 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6834 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6835 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6836 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6837 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6838 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6839 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6840 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6841 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6842 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6843 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6844 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6845 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6846 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6847 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6848 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6849 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6850 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6851 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6852 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6853 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6854 };
6855 
gfx_v9_0_query_utc_edc_status(struct amdgpu_device * adev,struct ras_err_data * err_data)6856 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6857 					 struct ras_err_data *err_data)
6858 {
6859 	uint32_t i, data;
6860 	uint32_t sec_count, ded_count;
6861 
6862 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6863 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6864 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6865 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6866 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6867 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6868 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6869 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6870 
6871 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6872 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6873 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6874 
6875 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6876 		if (sec_count) {
6877 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6878 				"SEC %d\n", i, vml2_mems[i], sec_count);
6879 			err_data->ce_count += sec_count;
6880 		}
6881 
6882 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6883 		if (ded_count) {
6884 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6885 				"DED %d\n", i, vml2_mems[i], ded_count);
6886 			err_data->ue_count += ded_count;
6887 		}
6888 	}
6889 
6890 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6891 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6892 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6893 
6894 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6895 						SEC_COUNT);
6896 		if (sec_count) {
6897 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6898 				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6899 			err_data->ce_count += sec_count;
6900 		}
6901 
6902 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6903 						DED_COUNT);
6904 		if (ded_count) {
6905 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6906 				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6907 			err_data->ue_count += ded_count;
6908 		}
6909 	}
6910 
6911 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6912 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6913 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6914 
6915 		sec_count = (data & 0x00006000L) >> 0xd;
6916 		if (sec_count) {
6917 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6918 				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6919 				sec_count);
6920 			err_data->ce_count += sec_count;
6921 		}
6922 	}
6923 
6924 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6925 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6926 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6927 
6928 		sec_count = (data & 0x00006000L) >> 0xd;
6929 		if (sec_count) {
6930 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6931 				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6932 				sec_count);
6933 			err_data->ce_count += sec_count;
6934 		}
6935 
6936 		ded_count = (data & 0x00018000L) >> 0xf;
6937 		if (ded_count) {
6938 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6939 				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6940 				ded_count);
6941 			err_data->ue_count += ded_count;
6942 		}
6943 	}
6944 
6945 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6946 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6947 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6948 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6949 
6950 	return 0;
6951 }
6952 
gfx_v9_0_ras_error_count(struct amdgpu_device * adev,const struct soc15_reg_entry * reg,uint32_t se_id,uint32_t inst_id,uint32_t value,uint32_t * sec_count,uint32_t * ded_count)6953 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6954 	const struct soc15_reg_entry *reg,
6955 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6956 	uint32_t *sec_count, uint32_t *ded_count)
6957 {
6958 	uint32_t i;
6959 	uint32_t sec_cnt, ded_cnt;
6960 
6961 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6962 		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6963 			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6964 			gfx_v9_0_ras_fields[i].inst != reg->inst)
6965 			continue;
6966 
6967 		sec_cnt = (value &
6968 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6969 				gfx_v9_0_ras_fields[i].sec_count_shift;
6970 		if (sec_cnt) {
6971 			dev_info(adev->dev, "GFX SubBlock %s, "
6972 				"Instance[%d][%d], SEC %d\n",
6973 				gfx_v9_0_ras_fields[i].name,
6974 				se_id, inst_id,
6975 				sec_cnt);
6976 			*sec_count += sec_cnt;
6977 		}
6978 
6979 		ded_cnt = (value &
6980 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6981 				gfx_v9_0_ras_fields[i].ded_count_shift;
6982 		if (ded_cnt) {
6983 			dev_info(adev->dev, "GFX SubBlock %s, "
6984 				"Instance[%d][%d], DED %d\n",
6985 				gfx_v9_0_ras_fields[i].name,
6986 				se_id, inst_id,
6987 				ded_cnt);
6988 			*ded_count += ded_cnt;
6989 		}
6990 	}
6991 
6992 	return 0;
6993 }
6994 
gfx_v9_0_reset_ras_error_count(struct amdgpu_device * adev)6995 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6996 {
6997 	int i, j, k;
6998 
6999 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
7000 		return;
7001 
7002 	/* read back registers to clear the counters */
7003 	mutex_lock(&adev->grbm_idx_mutex);
7004 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
7005 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
7006 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
7007 				amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
7008 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
7009 			}
7010 		}
7011 	}
7012 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
7013 	mutex_unlock(&adev->grbm_idx_mutex);
7014 
7015 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
7016 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
7017 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
7018 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
7019 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
7020 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
7021 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
7022 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
7023 
7024 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
7025 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
7026 		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
7027 	}
7028 
7029 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
7030 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
7031 		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
7032 	}
7033 
7034 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
7035 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
7036 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
7037 	}
7038 
7039 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
7040 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
7041 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
7042 	}
7043 
7044 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
7045 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
7046 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
7047 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
7048 }
7049 
gfx_v9_0_query_ras_error_count(struct amdgpu_device * adev,void * ras_error_status)7050 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
7051 					  void *ras_error_status)
7052 {
7053 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
7054 	uint32_t sec_count = 0, ded_count = 0;
7055 	uint32_t i, j, k;
7056 	uint32_t reg_value;
7057 
7058 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
7059 		return;
7060 
7061 	err_data->ue_count = 0;
7062 	err_data->ce_count = 0;
7063 
7064 	mutex_lock(&adev->grbm_idx_mutex);
7065 
7066 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
7067 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
7068 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
7069 				amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
7070 				reg_value =
7071 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
7072 				if (reg_value)
7073 					gfx_v9_0_ras_error_count(adev,
7074 						&gfx_v9_0_edc_counter_regs[i],
7075 						j, k, reg_value,
7076 						&sec_count, &ded_count);
7077 			}
7078 		}
7079 	}
7080 
7081 	err_data->ce_count += sec_count;
7082 	err_data->ue_count += ded_count;
7083 
7084 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7085 	mutex_unlock(&adev->grbm_idx_mutex);
7086 
7087 	gfx_v9_0_query_utc_edc_status(adev, err_data);
7088 }
7089 
gfx_v9_0_emit_mem_sync(struct amdgpu_ring * ring)7090 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
7091 {
7092 	const unsigned int cp_coher_cntl =
7093 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
7094 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
7095 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
7096 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
7097 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
7098 
7099 	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
7100 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
7101 	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
7102 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
7103 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
7104 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
7105 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
7106 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
7107 }
7108 
gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring * ring,uint32_t pipe,bool enable)7109 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
7110 					uint32_t pipe, bool enable)
7111 {
7112 	struct amdgpu_device *adev = ring->adev;
7113 	uint32_t val;
7114 	uint32_t wcl_cs_reg;
7115 
7116 	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
7117 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
7118 
7119 	switch (pipe) {
7120 	case 0:
7121 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
7122 		break;
7123 	case 1:
7124 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
7125 		break;
7126 	case 2:
7127 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
7128 		break;
7129 	case 3:
7130 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
7131 		break;
7132 	default:
7133 		DRM_DEBUG("invalid pipe %d\n", pipe);
7134 		return;
7135 	}
7136 
7137 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
7138 
7139 }
gfx_v9_0_emit_wave_limit(struct amdgpu_ring * ring,bool enable)7140 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
7141 {
7142 	struct amdgpu_device *adev = ring->adev;
7143 	uint32_t val;
7144 	int i;
7145 
7146 
7147 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
7148 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
7149 	 * around 25% of gpu resources.
7150 	 */
7151 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
7152 	amdgpu_ring_emit_wreg(ring,
7153 			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
7154 			      val);
7155 
7156 	/* Restrict waves for normal/low priority compute queues as well
7157 	 * to get best QoS for high priority compute jobs.
7158 	 *
7159 	 * amdgpu controls only 1st ME(0-3 CS pipes).
7160 	 */
7161 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
7162 		if (i != ring->pipe)
7163 			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
7164 
7165 	}
7166 }
7167 
gfx_v9_ring_insert_nop(struct amdgpu_ring * ring,uint32_t num_nop)7168 static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
7169 {
7170 	int i;
7171 
7172 	/* Header itself is a NOP packet */
7173 	if (num_nop == 1) {
7174 		amdgpu_ring_write(ring, ring->funcs->nop);
7175 		return;
7176 	}
7177 
7178 	/* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
7179 	amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
7180 
7181 	/* Header is at index 0, followed by num_nops - 1 NOP packet's */
7182 	for (i = 1; i < num_nop; i++)
7183 		amdgpu_ring_write(ring, ring->funcs->nop);
7184 }
7185 
gfx_v9_0_reset_kgq(struct amdgpu_ring * ring,unsigned int vmid)7186 static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
7187 {
7188 	struct amdgpu_device *adev = ring->adev;
7189 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
7190 	struct amdgpu_ring *kiq_ring = &kiq->ring;
7191 	unsigned long flags;
7192 	u32 tmp;
7193 	int r;
7194 
7195 	if (amdgpu_sriov_vf(adev))
7196 		return -EINVAL;
7197 
7198 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
7199 		return -EINVAL;
7200 
7201 	spin_lock_irqsave(&kiq->ring_lock, flags);
7202 
7203 	if (amdgpu_ring_alloc(kiq_ring, 5)) {
7204 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
7205 		return -ENOMEM;
7206 	}
7207 
7208 	tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
7209 	gfx_v9_0_ring_emit_wreg(kiq_ring,
7210 				 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp);
7211 	amdgpu_ring_commit(kiq_ring);
7212 
7213 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
7214 
7215 	r = amdgpu_ring_test_ring(kiq_ring);
7216 	if (r)
7217 		return r;
7218 
7219 	if (amdgpu_ring_alloc(ring, 7 + 7 + 5))
7220 		return -ENOMEM;
7221 	gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
7222 				 ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC);
7223 	gfx_v9_0_ring_emit_reg_wait(ring,
7224 				    SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff);
7225 	gfx_v9_0_ring_emit_wreg(ring,
7226 				SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0);
7227 
7228 	return amdgpu_ring_test_ring(ring);
7229 }
7230 
gfx_v9_0_reset_kcq(struct amdgpu_ring * ring,unsigned int vmid)7231 static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
7232 			      unsigned int vmid)
7233 {
7234 	struct amdgpu_device *adev = ring->adev;
7235 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
7236 	struct amdgpu_ring *kiq_ring = &kiq->ring;
7237 	unsigned long flags;
7238 	int i, r;
7239 
7240 	if (!adev->debug_exp_resets &&
7241 	    !adev->gfx.num_gfx_rings)
7242 		return -EINVAL;
7243 
7244 	if (amdgpu_sriov_vf(adev))
7245 		return -EINVAL;
7246 
7247 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
7248 		return -EINVAL;
7249 
7250 	spin_lock_irqsave(&kiq->ring_lock, flags);
7251 
7252 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
7253 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
7254 		return -ENOMEM;
7255 	}
7256 
7257 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
7258 				   0, 0);
7259 	amdgpu_ring_commit(kiq_ring);
7260 
7261 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
7262 
7263 	r = amdgpu_ring_test_ring(kiq_ring);
7264 	if (r)
7265 		return r;
7266 
7267 	/* make sure dequeue is complete*/
7268 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
7269 	mutex_lock(&adev->srbm_mutex);
7270 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
7271 	for (i = 0; i < adev->usec_timeout; i++) {
7272 		if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
7273 			break;
7274 		udelay(1);
7275 	}
7276 	if (i >= adev->usec_timeout)
7277 		r = -ETIMEDOUT;
7278 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7279 	mutex_unlock(&adev->srbm_mutex);
7280 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
7281 	if (r) {
7282 		dev_err(adev->dev, "fail to wait on hqd deactive\n");
7283 		return r;
7284 	}
7285 
7286 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
7287 	if (unlikely(r != 0)){
7288 		dev_err(adev->dev, "fail to resv mqd_obj\n");
7289 		return r;
7290 	}
7291 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
7292 	if (!r) {
7293 		r = gfx_v9_0_kcq_init_queue(ring, true);
7294 		amdgpu_bo_kunmap(ring->mqd_obj);
7295 		ring->mqd_ptr = NULL;
7296 	}
7297 	amdgpu_bo_unreserve(ring->mqd_obj);
7298 	if (r) {
7299 		dev_err(adev->dev, "fail to unresv mqd_obj\n");
7300 		return r;
7301 	}
7302 	spin_lock_irqsave(&kiq->ring_lock, flags);
7303 	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
7304 	if (r) {
7305 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
7306 		return -ENOMEM;
7307 	}
7308 	kiq->pmf->kiq_map_queues(kiq_ring, ring);
7309 	amdgpu_ring_commit(kiq_ring);
7310 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
7311 	r = amdgpu_ring_test_ring(kiq_ring);
7312 	if (r) {
7313 		DRM_ERROR("fail to remap queue\n");
7314 		return r;
7315 	}
7316 	return amdgpu_ring_test_ring(ring);
7317 }
7318 
gfx_v9_ip_print(void * handle,struct drm_printer * p)7319 static void gfx_v9_ip_print(void *handle, struct drm_printer *p)
7320 {
7321 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7322 	uint32_t i, j, k, reg, index = 0;
7323 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7324 
7325 	if (!adev->gfx.ip_dump_core)
7326 		return;
7327 
7328 	for (i = 0; i < reg_count; i++)
7329 		drm_printf(p, "%-50s \t 0x%08x\n",
7330 			   gc_reg_list_9[i].reg_name,
7331 			   adev->gfx.ip_dump_core[i]);
7332 
7333 	/* print compute queue registers for all instances */
7334 	if (!adev->gfx.ip_dump_compute_queues)
7335 		return;
7336 
7337 	reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7338 	drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
7339 		   adev->gfx.mec.num_mec,
7340 		   adev->gfx.mec.num_pipe_per_mec,
7341 		   adev->gfx.mec.num_queue_per_pipe);
7342 
7343 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7344 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7345 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7346 				drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
7347 				for (reg = 0; reg < reg_count; reg++) {
7348 					drm_printf(p, "%-50s \t 0x%08x\n",
7349 						   gc_cp_reg_list_9[reg].reg_name,
7350 						   adev->gfx.ip_dump_compute_queues[index + reg]);
7351 				}
7352 				index += reg_count;
7353 			}
7354 		}
7355 	}
7356 
7357 }
7358 
gfx_v9_ip_dump(void * handle)7359 static void gfx_v9_ip_dump(void *handle)
7360 {
7361 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7362 	uint32_t i, j, k, reg, index = 0;
7363 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7364 
7365 	if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings)
7366 		return;
7367 
7368 	amdgpu_gfx_off_ctrl(adev, false);
7369 	for (i = 0; i < reg_count; i++)
7370 		adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i]));
7371 	amdgpu_gfx_off_ctrl(adev, true);
7372 
7373 	/* dump compute queue registers for all instances */
7374 	if (!adev->gfx.ip_dump_compute_queues)
7375 		return;
7376 
7377 	reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7378 	amdgpu_gfx_off_ctrl(adev, false);
7379 	mutex_lock(&adev->srbm_mutex);
7380 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7381 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7382 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7383 				/* ME0 is for GFX so start from 1 for CP */
7384 				soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
7385 
7386 				for (reg = 0; reg < reg_count; reg++) {
7387 					adev->gfx.ip_dump_compute_queues[index + reg] =
7388 						RREG32(SOC15_REG_ENTRY_OFFSET(
7389 							gc_cp_reg_list_9[reg]));
7390 				}
7391 				index += reg_count;
7392 			}
7393 		}
7394 	}
7395 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7396 	mutex_unlock(&adev->srbm_mutex);
7397 	amdgpu_gfx_off_ctrl(adev, true);
7398 
7399 }
7400 
gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring * ring)7401 static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
7402 {
7403 	/* Emit the cleaner shader */
7404 	amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
7405 	amdgpu_ring_write(ring, 0);  /* RESERVED field, programmed to zero */
7406 }
7407 
7408 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
7409 	.name = "gfx_v9_0",
7410 	.early_init = gfx_v9_0_early_init,
7411 	.late_init = gfx_v9_0_late_init,
7412 	.sw_init = gfx_v9_0_sw_init,
7413 	.sw_fini = gfx_v9_0_sw_fini,
7414 	.hw_init = gfx_v9_0_hw_init,
7415 	.hw_fini = gfx_v9_0_hw_fini,
7416 	.suspend = gfx_v9_0_suspend,
7417 	.resume = gfx_v9_0_resume,
7418 	.is_idle = gfx_v9_0_is_idle,
7419 	.wait_for_idle = gfx_v9_0_wait_for_idle,
7420 	.soft_reset = gfx_v9_0_soft_reset,
7421 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
7422 	.set_powergating_state = gfx_v9_0_set_powergating_state,
7423 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
7424 	.dump_ip_state = gfx_v9_ip_dump,
7425 	.print_ip_state = gfx_v9_ip_print,
7426 };
7427 
7428 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
7429 	.type = AMDGPU_RING_TYPE_GFX,
7430 	.align_mask = 0xff,
7431 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7432 	.support_64bit_ptrs = true,
7433 	.secure_submission_supported = true,
7434 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
7435 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
7436 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
7437 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
7438 		5 +  /* COND_EXEC */
7439 		7 +  /* PIPELINE_SYNC */
7440 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7441 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7442 		2 + /* VM_FLUSH */
7443 		8 +  /* FENCE for VM_FLUSH */
7444 		20 + /* GDS switch */
7445 		4 + /* double SWITCH_BUFFER,
7446 		       the first COND_EXEC jump to the place just
7447 			   prior to this double SWITCH_BUFFER  */
7448 		5 + /* COND_EXEC */
7449 		7 +	 /*	HDP_flush */
7450 		4 +	 /*	VGT_flush */
7451 		14 + /*	CE_META */
7452 		31 + /*	DE_META */
7453 		3 + /* CNTX_CTRL */
7454 		5 + /* HDP_INVL */
7455 		8 + 8 + /* FENCE x2 */
7456 		2 + /* SWITCH_BUFFER */
7457 		7 + /* gfx_v9_0_emit_mem_sync */
7458 		2, /* gfx_v9_0_ring_emit_cleaner_shader */
7459 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
7460 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7461 	.emit_fence = gfx_v9_0_ring_emit_fence,
7462 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7463 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7464 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7465 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7466 	.test_ring = gfx_v9_0_ring_test_ring,
7467 	.insert_nop = gfx_v9_ring_insert_nop,
7468 	.pad_ib = amdgpu_ring_generic_pad_ib,
7469 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
7470 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7471 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7472 	.preempt_ib = gfx_v9_0_ring_preempt_ib,
7473 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7474 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7475 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7476 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7477 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
7478 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7479 	.reset = gfx_v9_0_reset_kgq,
7480 	.emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7481 	.begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7482 	.end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7483 };
7484 
7485 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
7486 	.type = AMDGPU_RING_TYPE_GFX,
7487 	.align_mask = 0xff,
7488 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7489 	.support_64bit_ptrs = true,
7490 	.secure_submission_supported = true,
7491 	.get_rptr = amdgpu_sw_ring_get_rptr_gfx,
7492 	.get_wptr = amdgpu_sw_ring_get_wptr_gfx,
7493 	.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
7494 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
7495 		5 +  /* COND_EXEC */
7496 		7 +  /* PIPELINE_SYNC */
7497 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7498 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7499 		2 + /* VM_FLUSH */
7500 		8 +  /* FENCE for VM_FLUSH */
7501 		20 + /* GDS switch */
7502 		4 + /* double SWITCH_BUFFER,
7503 		     * the first COND_EXEC jump to the place just
7504 		     * prior to this double SWITCH_BUFFER
7505 		     */
7506 		5 + /* COND_EXEC */
7507 		7 +	 /*	HDP_flush */
7508 		4 +	 /*	VGT_flush */
7509 		14 + /*	CE_META */
7510 		31 + /*	DE_META */
7511 		3 + /* CNTX_CTRL */
7512 		5 + /* HDP_INVL */
7513 		8 + 8 + /* FENCE x2 */
7514 		2 + /* SWITCH_BUFFER */
7515 		7 + /* gfx_v9_0_emit_mem_sync */
7516 		2, /* gfx_v9_0_ring_emit_cleaner_shader */
7517 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
7518 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7519 	.emit_fence = gfx_v9_0_ring_emit_fence,
7520 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7521 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7522 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7523 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7524 	.test_ring = gfx_v9_0_ring_test_ring,
7525 	.test_ib = gfx_v9_0_ring_test_ib,
7526 	.insert_nop = gfx_v9_ring_insert_nop,
7527 	.pad_ib = amdgpu_ring_generic_pad_ib,
7528 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
7529 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7530 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7531 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7532 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7533 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7534 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7535 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
7536 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7537 	.patch_cntl = gfx_v9_0_ring_patch_cntl,
7538 	.patch_de = gfx_v9_0_ring_patch_de_meta,
7539 	.patch_ce = gfx_v9_0_ring_patch_ce_meta,
7540 	.emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7541 	.begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7542 	.end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7543 };
7544 
7545 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
7546 	.type = AMDGPU_RING_TYPE_COMPUTE,
7547 	.align_mask = 0xff,
7548 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7549 	.support_64bit_ptrs = true,
7550 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
7551 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
7552 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
7553 	.emit_frame_size =
7554 		20 + /* gfx_v9_0_ring_emit_gds_switch */
7555 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
7556 		5 + /* hdp invalidate */
7557 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7558 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7559 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7560 		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7561 		7 + /* gfx_v9_0_emit_mem_sync */
7562 		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7563 		15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
7564 		2, /* gfx_v9_0_ring_emit_cleaner_shader */
7565 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
7566 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
7567 	.emit_fence = gfx_v9_0_ring_emit_fence,
7568 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7569 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7570 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7571 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7572 	.test_ring = gfx_v9_0_ring_test_ring,
7573 	.test_ib = gfx_v9_0_ring_test_ib,
7574 	.insert_nop = gfx_v9_ring_insert_nop,
7575 	.pad_ib = amdgpu_ring_generic_pad_ib,
7576 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7577 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7578 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7579 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
7580 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7581 	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
7582 	.reset = gfx_v9_0_reset_kcq,
7583 	.emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7584 	.begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7585 	.end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7586 };
7587 
7588 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7589 	.type = AMDGPU_RING_TYPE_KIQ,
7590 	.align_mask = 0xff,
7591 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7592 	.support_64bit_ptrs = true,
7593 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
7594 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
7595 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
7596 	.emit_frame_size =
7597 		20 + /* gfx_v9_0_ring_emit_gds_switch */
7598 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
7599 		5 + /* hdp invalidate */
7600 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7601 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7602 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7603 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7604 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
7605 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7606 	.test_ring = gfx_v9_0_ring_test_ring,
7607 	.insert_nop = amdgpu_ring_insert_nop,
7608 	.pad_ib = amdgpu_ring_generic_pad_ib,
7609 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
7610 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7611 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7612 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7613 };
7614 
gfx_v9_0_set_ring_funcs(struct amdgpu_device * adev)7615 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7616 {
7617 	int i;
7618 
7619 	adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7620 
7621 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7622 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7623 
7624 	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
7625 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
7626 			adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
7627 	}
7628 
7629 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7630 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7631 }
7632 
7633 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7634 	.set = gfx_v9_0_set_eop_interrupt_state,
7635 	.process = gfx_v9_0_eop_irq,
7636 };
7637 
7638 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7639 	.set = gfx_v9_0_set_priv_reg_fault_state,
7640 	.process = gfx_v9_0_priv_reg_irq,
7641 };
7642 
7643 static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = {
7644 	.set = gfx_v9_0_set_bad_op_fault_state,
7645 	.process = gfx_v9_0_bad_op_irq,
7646 };
7647 
7648 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7649 	.set = gfx_v9_0_set_priv_inst_fault_state,
7650 	.process = gfx_v9_0_priv_inst_irq,
7651 };
7652 
7653 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7654 	.set = gfx_v9_0_set_cp_ecc_error_state,
7655 	.process = amdgpu_gfx_cp_ecc_error_irq,
7656 };
7657 
7658 
gfx_v9_0_set_irq_funcs(struct amdgpu_device * adev)7659 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7660 {
7661 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7662 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7663 
7664 	adev->gfx.priv_reg_irq.num_types = 1;
7665 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7666 
7667 	adev->gfx.bad_op_irq.num_types = 1;
7668 	adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs;
7669 
7670 	adev->gfx.priv_inst_irq.num_types = 1;
7671 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7672 
7673 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7674 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7675 }
7676 
gfx_v9_0_set_rlc_funcs(struct amdgpu_device * adev)7677 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7678 {
7679 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7680 	case IP_VERSION(9, 0, 1):
7681 	case IP_VERSION(9, 2, 1):
7682 	case IP_VERSION(9, 4, 0):
7683 	case IP_VERSION(9, 2, 2):
7684 	case IP_VERSION(9, 1, 0):
7685 	case IP_VERSION(9, 4, 1):
7686 	case IP_VERSION(9, 3, 0):
7687 	case IP_VERSION(9, 4, 2):
7688 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7689 		break;
7690 	default:
7691 		break;
7692 	}
7693 }
7694 
gfx_v9_0_set_gds_init(struct amdgpu_device * adev)7695 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7696 {
7697 	/* init asci gds info */
7698 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7699 	case IP_VERSION(9, 0, 1):
7700 	case IP_VERSION(9, 2, 1):
7701 	case IP_VERSION(9, 4, 0):
7702 		adev->gds.gds_size = 0x10000;
7703 		break;
7704 	case IP_VERSION(9, 2, 2):
7705 	case IP_VERSION(9, 1, 0):
7706 	case IP_VERSION(9, 4, 1):
7707 		adev->gds.gds_size = 0x1000;
7708 		break;
7709 	case IP_VERSION(9, 4, 2):
7710 		/* aldebaran removed all the GDS internal memory,
7711 		 * only support GWS opcode in kernel, like barrier
7712 		 * semaphore.etc */
7713 		adev->gds.gds_size = 0;
7714 		break;
7715 	default:
7716 		adev->gds.gds_size = 0x10000;
7717 		break;
7718 	}
7719 
7720 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7721 	case IP_VERSION(9, 0, 1):
7722 	case IP_VERSION(9, 4, 0):
7723 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7724 		break;
7725 	case IP_VERSION(9, 2, 1):
7726 		adev->gds.gds_compute_max_wave_id = 0x27f;
7727 		break;
7728 	case IP_VERSION(9, 2, 2):
7729 	case IP_VERSION(9, 1, 0):
7730 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7731 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7732 		else
7733 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7734 		break;
7735 	case IP_VERSION(9, 4, 1):
7736 		adev->gds.gds_compute_max_wave_id = 0xfff;
7737 		break;
7738 	case IP_VERSION(9, 4, 2):
7739 		/* deprecated for Aldebaran, no usage at all */
7740 		adev->gds.gds_compute_max_wave_id = 0;
7741 		break;
7742 	default:
7743 		/* this really depends on the chip */
7744 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7745 		break;
7746 	}
7747 
7748 	adev->gds.gws_size = 64;
7749 	adev->gds.oa_size = 16;
7750 }
7751 
gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device * adev,u32 bitmap)7752 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7753 						 u32 bitmap)
7754 {
7755 	u32 data;
7756 
7757 	if (!bitmap)
7758 		return;
7759 
7760 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7761 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7762 
7763 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7764 }
7765 
gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device * adev)7766 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7767 {
7768 	u32 data, mask;
7769 
7770 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7771 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7772 
7773 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7774 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7775 
7776 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7777 
7778 	return (~data) & mask;
7779 }
7780 
gfx_v9_0_get_cu_info(struct amdgpu_device * adev,struct amdgpu_cu_info * cu_info)7781 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7782 				 struct amdgpu_cu_info *cu_info)
7783 {
7784 	int i, j, k, counter, active_cu_number = 0;
7785 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7786 	unsigned disable_masks[4 * 4];
7787 
7788 	if (!adev || !cu_info)
7789 		return -EINVAL;
7790 
7791 	/*
7792 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7793 	 */
7794 	if (adev->gfx.config.max_shader_engines *
7795 		adev->gfx.config.max_sh_per_se > 16)
7796 		return -EINVAL;
7797 
7798 	amdgpu_gfx_parse_disable_cu(disable_masks,
7799 				    adev->gfx.config.max_shader_engines,
7800 				    adev->gfx.config.max_sh_per_se);
7801 
7802 	mutex_lock(&adev->grbm_idx_mutex);
7803 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7804 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7805 			mask = 1;
7806 			ao_bitmap = 0;
7807 			counter = 0;
7808 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
7809 			gfx_v9_0_set_user_cu_inactive_bitmap(
7810 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7811 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7812 
7813 			/*
7814 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7815 			 * 4x4 size array, and it's usually suitable for Vega
7816 			 * ASICs which has 4*2 SE/SH layout.
7817 			 * But for Arcturus, SE/SH layout is changed to 8*1.
7818 			 * To mostly reduce the impact, we make it compatible
7819 			 * with current bitmap array as below:
7820 			 *    SE4,SH0 --> bitmap[0][1]
7821 			 *    SE5,SH0 --> bitmap[1][1]
7822 			 *    SE6,SH0 --> bitmap[2][1]
7823 			 *    SE7,SH0 --> bitmap[3][1]
7824 			 */
7825 			cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
7826 
7827 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7828 				if (bitmap & mask) {
7829 					if (counter < adev->gfx.config.max_cu_per_sh)
7830 						ao_bitmap |= mask;
7831 					counter ++;
7832 				}
7833 				mask <<= 1;
7834 			}
7835 			active_cu_number += counter;
7836 			if (i < 2 && j < 2)
7837 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7838 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7839 		}
7840 	}
7841 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7842 	mutex_unlock(&adev->grbm_idx_mutex);
7843 
7844 	cu_info->number = active_cu_number;
7845 	cu_info->ao_cu_mask = ao_cu_mask;
7846 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7847 
7848 	return 0;
7849 }
7850 
7851 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7852 {
7853 	.type = AMD_IP_BLOCK_TYPE_GFX,
7854 	.major = 9,
7855 	.minor = 0,
7856 	.rev = 0,
7857 	.funcs = &gfx_v9_0_ip_funcs,
7858 };
7859