1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2020-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudi2P.h"
9 #include "gaudi2_masks.h"
10 #include "../include/gaudi2/gaudi2_special_blocks.h"
11 #include "../include/hw_ip/mmu/mmu_general.h"
12 #include "../include/hw_ip/mmu/mmu_v2_0.h"
13 #include "../include/gaudi2/gaudi2_packets.h"
14 #include "../include/gaudi2/gaudi2_reg_map.h"
15 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h"
16 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h"
17 
18 #include <linux/module.h>
19 #include <linux/pci.h>
20 #include <linux/hwmon.h>
21 #include <linux/iommu.h>
22 
23 #define GAUDI2_DMA_POOL_BLK_SIZE		SZ_256		/* 256 bytes */
24 
25 #define GAUDI2_RESET_TIMEOUT_MSEC		2000		/* 2000ms */
26 
27 #define GAUDI2_RESET_POLL_TIMEOUT_USEC		500000		/* 500ms */
28 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC		25000		/* 25s */
29 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC		25000		/* 25s */
30 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC	3000000		/* 3s */
31 #define GAUDI2_RESET_POLL_CNT			3
32 #define GAUDI2_RESET_WAIT_MSEC			1		/* 1ms */
33 #define GAUDI2_CPU_RESET_WAIT_MSEC		100		/* 100ms */
34 #define GAUDI2_PLDM_RESET_WAIT_MSEC		1000		/* 1s */
35 #define GAUDI2_CB_POOL_CB_CNT			512
36 #define GAUDI2_CB_POOL_CB_SIZE			SZ_128K		/* 128KB */
37 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC		4000000		/* 4s */
38 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC		25000000	/* 25s */
39 #define GAUDI2_TEST_QUEUE_WAIT_USEC		100000		/* 100ms */
40 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
41 
42 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT		3
43 
44 /*
45  * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs
46  * and the code relies on that value (for array size etc..) we define another value
47  * for MAX faulty TPCs which reflects the cluster binning requirements
48  */
49 #define MAX_CLUSTER_BINNING_FAULTY_TPCS		1
50 #define MAX_FAULTY_XBARS			1
51 #define MAX_FAULTY_EDMAS			1
52 #define MAX_FAULTY_DECODERS			1
53 
54 #define GAUDI2_TPC_FULL_MASK			0x1FFFFFF
55 #define GAUDI2_HIF_HMMU_FULL_MASK		0xFFFF
56 #define GAUDI2_DECODER_FULL_MASK		0x3FF
57 
58 #define GAUDI2_NA_EVENT_CAUSE			0xFF
59 #define GAUDI2_NUM_OF_QM_ERR_CAUSE		18
60 #define GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE	25
61 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE		3
62 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE		14
63 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE		3
64 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE		2
65 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE		22
66 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE		31
67 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE		25
68 #define GAUDI2_NUM_OF_MME_ERR_CAUSE		16
69 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE		7
70 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE	8
71 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE		19
72 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE		9
73 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE		3
74 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE	3
75 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE	2
76 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE	2
77 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE	2
78 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE		5
79 
80 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 10)
81 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC		(MMU_CONFIG_TIMEOUT_USEC * 200)
82 #define GAUDI2_ARB_WDT_TIMEOUT			(0x1000000)
83 
84 #define GAUDI2_VDEC_TIMEOUT_USEC		10000		/* 10ms */
85 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC		(GAUDI2_VDEC_TIMEOUT_USEC * 100)
86 
87 #define KDMA_TIMEOUT_USEC			USEC_PER_SEC
88 
89 #define IS_DMA_IDLE(dma_core_sts0)	\
90 	(!((dma_core_sts0) & (DCORE0_EDMA0_CORE_STS0_BUSY_MASK)))
91 
92 #define IS_DMA_HALTED(dma_core_sts1)	\
93 	((dma_core_sts1) & (DCORE0_EDMA0_CORE_STS1_IS_HALT_MASK))
94 
95 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
96 
97 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK))
98 
99 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \
100 	((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \
101 	(((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \
102 	(((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK)))
103 
104 #define PCIE_DEC_EN_MASK			0x300
105 #define DEC_WORK_STATE_IDLE			0
106 #define DEC_WORK_STATE_PEND			3
107 #define IS_DEC_IDLE(dec_swreg15) \
108 	(((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \
109 	((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) ==  DEC_WORK_STATE_PEND)
110 
111 /* HBM MMU address scrambling parameters */
112 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE		SZ_8M
113 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT		26
114 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT		0
115 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK	DRAM_VA_HINT_MASK
116 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR	16
117 #define MMU_RANGE_INV_VA_LSB_SHIFT		12
118 #define MMU_RANGE_INV_VA_MSB_SHIFT		44
119 #define MMU_RANGE_INV_EN_SHIFT			0
120 #define MMU_RANGE_INV_ASID_EN_SHIFT		1
121 #define MMU_RANGE_INV_ASID_SHIFT		2
122 
123 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
124  * a 2 entries FIFO, and hence it is not enabled for it.
125  */
126 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
127 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
128 
129 #define GAUDI2_MAX_STRING_LEN			64
130 
131 #define GAUDI2_VDEC_MSIX_ENTRIES		(GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
132 							GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
133 
134 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
135 
136 /* RAZWI initiator coordinates */
137 #define RAZWI_GET_AXUSER_XY(x) \
138 	((x & 0xF8001FF0) >> 4)
139 
140 #define RAZWI_GET_AXUSER_LOW_XY(x) \
141 	((x & 0x00001FF0) >> 4)
142 
143 #define RAZWI_INITIATOR_AXUER_L_X_SHIFT		0
144 #define RAZWI_INITIATOR_AXUER_L_X_MASK		0x1F
145 #define RAZWI_INITIATOR_AXUER_L_Y_SHIFT		5
146 #define RAZWI_INITIATOR_AXUER_L_Y_MASK		0xF
147 
148 #define RAZWI_INITIATOR_AXUER_H_X_SHIFT		23
149 #define RAZWI_INITIATOR_AXUER_H_X_MASK		0x1F
150 
151 #define RAZWI_INITIATOR_ID_X_Y_LOW(x, y) \
152 	((((y) & RAZWI_INITIATOR_AXUER_L_Y_MASK) << RAZWI_INITIATOR_AXUER_L_Y_SHIFT) | \
153 		(((x) & RAZWI_INITIATOR_AXUER_L_X_MASK) << RAZWI_INITIATOR_AXUER_L_X_SHIFT))
154 
155 #define RAZWI_INITIATOR_ID_X_HIGH(x) \
156 		(((x) & RAZWI_INITIATOR_AXUER_H_X_MASK) << RAZWI_INITIATOR_AXUER_H_X_SHIFT)
157 
158 #define RAZWI_INITIATOR_ID_X_Y(xl, yl, xh) \
159 	(RAZWI_INITIATOR_ID_X_Y_LOW(xl, yl) | RAZWI_INITIATOR_ID_X_HIGH(xh))
160 
161 #define PSOC_RAZWI_ENG_STR_SIZE			128
162 #define PSOC_RAZWI_MAX_ENG_PER_RTR		5
163 
164 /* HW scrambles only bits 0-25 */
165 #define HW_UNSCRAMBLED_BITS_MASK		GENMASK_ULL(63, 26)
166 
167 #define GAUDI2_GLBL_ERR_MAX_CAUSE_NUM		17
168 
169 struct gaudi2_razwi_info {
170 	u32 axuser_xy;
171 	u32 rtr_ctrl;
172 	u16 eng_id;
173 	char *eng_name;
174 };
175 
176 static struct gaudi2_razwi_info common_razwi_info[] = {
177 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 0), mmDCORE0_RTR0_CTRL_BASE,
178 				GAUDI2_DCORE0_ENGINE_ID_DEC_0, "DEC0"},
179 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
180 				GAUDI2_DCORE0_ENGINE_ID_DEC_1, "DEC1"},
181 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 18), mmDCORE1_RTR7_CTRL_BASE,
182 				GAUDI2_DCORE1_ENGINE_ID_DEC_0, "DEC2"},
183 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
184 				GAUDI2_DCORE1_ENGINE_ID_DEC_1, "DEC3"},
185 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 0), mmDCORE2_RTR0_CTRL_BASE,
186 				GAUDI2_DCORE2_ENGINE_ID_DEC_0, "DEC4"},
187 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
188 				GAUDI2_DCORE2_ENGINE_ID_DEC_1, "DEC5"},
189 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 18), mmDCORE3_RTR7_CTRL_BASE,
190 				GAUDI2_DCORE3_ENGINE_ID_DEC_0, "DEC6"},
191 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
192 				GAUDI2_DCORE3_ENGINE_ID_DEC_1, "DEC7"},
193 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 6), mmDCORE0_RTR0_CTRL_BASE,
194 				GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC8"},
195 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 7), mmDCORE0_RTR0_CTRL_BASE,
196 				GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC9"},
197 		{RAZWI_INITIATOR_ID_X_Y(3, 4, 2), mmDCORE0_RTR1_CTRL_BASE,
198 				GAUDI2_DCORE0_ENGINE_ID_TPC_0, "TPC0"},
199 		{RAZWI_INITIATOR_ID_X_Y(3, 4, 4), mmDCORE0_RTR1_CTRL_BASE,
200 				GAUDI2_DCORE0_ENGINE_ID_TPC_1, "TPC1"},
201 		{RAZWI_INITIATOR_ID_X_Y(4, 4, 2), mmDCORE0_RTR2_CTRL_BASE,
202 				GAUDI2_DCORE0_ENGINE_ID_TPC_2, "TPC2"},
203 		{RAZWI_INITIATOR_ID_X_Y(4, 4, 4), mmDCORE0_RTR2_CTRL_BASE,
204 				GAUDI2_DCORE0_ENGINE_ID_TPC_3, "TPC3"},
205 		{RAZWI_INITIATOR_ID_X_Y(5, 4, 2), mmDCORE0_RTR3_CTRL_BASE,
206 				GAUDI2_DCORE0_ENGINE_ID_TPC_4, "TPC4"},
207 		{RAZWI_INITIATOR_ID_X_Y(5, 4, 4), mmDCORE0_RTR3_CTRL_BASE,
208 				GAUDI2_DCORE0_ENGINE_ID_TPC_5, "TPC5"},
209 		{RAZWI_INITIATOR_ID_X_Y(16, 4, 14), mmDCORE1_RTR6_CTRL_BASE,
210 				GAUDI2_DCORE1_ENGINE_ID_TPC_0, "TPC6"},
211 		{RAZWI_INITIATOR_ID_X_Y(16, 4, 16), mmDCORE1_RTR6_CTRL_BASE,
212 				GAUDI2_DCORE1_ENGINE_ID_TPC_1, "TPC7"},
213 		{RAZWI_INITIATOR_ID_X_Y(15, 4, 14), mmDCORE1_RTR5_CTRL_BASE,
214 				GAUDI2_DCORE1_ENGINE_ID_TPC_2, "TPC8"},
215 		{RAZWI_INITIATOR_ID_X_Y(15, 4, 16), mmDCORE1_RTR5_CTRL_BASE,
216 				GAUDI2_DCORE1_ENGINE_ID_TPC_3, "TPC9"},
217 		{RAZWI_INITIATOR_ID_X_Y(14, 4, 14), mmDCORE1_RTR4_CTRL_BASE,
218 				GAUDI2_DCORE1_ENGINE_ID_TPC_4, "TPC10"},
219 		{RAZWI_INITIATOR_ID_X_Y(14, 4, 16), mmDCORE1_RTR4_CTRL_BASE,
220 				GAUDI2_DCORE1_ENGINE_ID_TPC_5, "TPC11"},
221 		{RAZWI_INITIATOR_ID_X_Y(5, 11, 2), mmDCORE2_RTR3_CTRL_BASE,
222 				GAUDI2_DCORE2_ENGINE_ID_TPC_0, "TPC12"},
223 		{RAZWI_INITIATOR_ID_X_Y(5, 11, 4), mmDCORE2_RTR3_CTRL_BASE,
224 				GAUDI2_DCORE2_ENGINE_ID_TPC_1, "TPC13"},
225 		{RAZWI_INITIATOR_ID_X_Y(4, 11, 2), mmDCORE2_RTR2_CTRL_BASE,
226 				GAUDI2_DCORE2_ENGINE_ID_TPC_2, "TPC14"},
227 		{RAZWI_INITIATOR_ID_X_Y(4, 11, 4), mmDCORE2_RTR2_CTRL_BASE,
228 				GAUDI2_DCORE2_ENGINE_ID_TPC_3, "TPC15"},
229 		{RAZWI_INITIATOR_ID_X_Y(3, 11, 2), mmDCORE2_RTR1_CTRL_BASE,
230 				GAUDI2_DCORE2_ENGINE_ID_TPC_4, "TPC16"},
231 		{RAZWI_INITIATOR_ID_X_Y(3, 11, 4), mmDCORE2_RTR1_CTRL_BASE,
232 				GAUDI2_DCORE2_ENGINE_ID_TPC_5, "TPC17"},
233 		{RAZWI_INITIATOR_ID_X_Y(14, 11, 14), mmDCORE3_RTR4_CTRL_BASE,
234 				GAUDI2_DCORE3_ENGINE_ID_TPC_0, "TPC18"},
235 		{RAZWI_INITIATOR_ID_X_Y(14, 11, 16), mmDCORE3_RTR4_CTRL_BASE,
236 				GAUDI2_DCORE3_ENGINE_ID_TPC_1, "TPC19"},
237 		{RAZWI_INITIATOR_ID_X_Y(15, 11, 14), mmDCORE3_RTR5_CTRL_BASE,
238 				GAUDI2_DCORE3_ENGINE_ID_TPC_2, "TPC20"},
239 		{RAZWI_INITIATOR_ID_X_Y(15, 11, 16), mmDCORE3_RTR5_CTRL_BASE,
240 				GAUDI2_DCORE3_ENGINE_ID_TPC_3, "TPC21"},
241 		{RAZWI_INITIATOR_ID_X_Y(16, 11, 14), mmDCORE3_RTR6_CTRL_BASE,
242 				GAUDI2_DCORE3_ENGINE_ID_TPC_4, "TPC22"},
243 		{RAZWI_INITIATOR_ID_X_Y(16, 11, 16), mmDCORE3_RTR6_CTRL_BASE,
244 				GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC23"},
245 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
246 				GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC24"},
247 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 8), mmDCORE1_RTR7_CTRL_BASE,
248 				GAUDI2_ENGINE_ID_NIC0_0, "NIC0"},
249 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 10), mmDCORE1_RTR7_CTRL_BASE,
250 				GAUDI2_ENGINE_ID_NIC0_1, "NIC1"},
251 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 12), mmDCORE1_RTR7_CTRL_BASE,
252 				GAUDI2_ENGINE_ID_NIC1_0, "NIC2"},
253 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
254 				GAUDI2_ENGINE_ID_NIC1_1, "NIC3"},
255 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 15), mmDCORE1_RTR7_CTRL_BASE,
256 				GAUDI2_ENGINE_ID_NIC2_0, "NIC4"},
257 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
258 				GAUDI2_ENGINE_ID_NIC2_1, "NIC5"},
259 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
260 				GAUDI2_ENGINE_ID_NIC3_0, "NIC6"},
261 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 6), mmDCORE2_RTR0_CTRL_BASE,
262 				GAUDI2_ENGINE_ID_NIC3_1, "NIC7"},
263 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 8), mmDCORE2_RTR0_CTRL_BASE,
264 				GAUDI2_ENGINE_ID_NIC4_0, "NIC8"},
265 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 12), mmDCORE3_RTR7_CTRL_BASE,
266 				GAUDI2_ENGINE_ID_NIC4_1, "NIC9"},
267 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
268 				GAUDI2_ENGINE_ID_NIC5_0, "NIC10"},
269 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
270 				GAUDI2_ENGINE_ID_NIC5_1, "NIC11"},
271 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
272 				GAUDI2_ENGINE_ID_PDMA_0, "PDMA0"},
273 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 3), mmDCORE0_RTR0_CTRL_BASE,
274 				GAUDI2_ENGINE_ID_PDMA_1, "PDMA1"},
275 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
276 				GAUDI2_ENGINE_ID_SIZE, "PMMU"},
277 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 5), mmDCORE0_RTR0_CTRL_BASE,
278 				GAUDI2_ENGINE_ID_SIZE, "PCIE"},
279 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 16), mmDCORE1_RTR7_CTRL_BASE,
280 				GAUDI2_ENGINE_ID_ARC_FARM, "ARC_FARM"},
281 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 17), mmDCORE1_RTR7_CTRL_BASE,
282 				GAUDI2_ENGINE_ID_KDMA, "KDMA"},
283 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF1_RTR_CTRL_BASE,
284 				GAUDI2_DCORE0_ENGINE_ID_EDMA_0, "EDMA0"},
285 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE,
286 				GAUDI2_DCORE0_ENGINE_ID_EDMA_1, "EDMA1"},
287 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF1_RTR_CTRL_BASE,
288 				GAUDI2_DCORE1_ENGINE_ID_EDMA_0, "EDMA2"},
289 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF0_RTR_CTRL_BASE,
290 				GAUDI2_DCORE1_ENGINE_ID_EDMA_1, "EDMA3"},
291 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
292 				GAUDI2_DCORE2_ENGINE_ID_EDMA_0, "EDMA4"},
293 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
294 				GAUDI2_DCORE2_ENGINE_ID_EDMA_1, "EDMA5"},
295 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
296 				GAUDI2_DCORE3_ENGINE_ID_EDMA_0, "EDMA6"},
297 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
298 				GAUDI2_DCORE3_ENGINE_ID_EDMA_1, "EDMA7"},
299 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
300 				GAUDI2_ENGINE_ID_SIZE, "HMMU0"},
301 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
302 				GAUDI2_ENGINE_ID_SIZE, "HMMU1"},
303 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
304 				GAUDI2_ENGINE_ID_SIZE, "HMMU2"},
305 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
306 				GAUDI2_ENGINE_ID_SIZE, "HMMU3"},
307 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
308 				GAUDI2_ENGINE_ID_SIZE, "HMMU4"},
309 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
310 				GAUDI2_ENGINE_ID_SIZE, "HMMU5"},
311 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
312 				GAUDI2_ENGINE_ID_SIZE, "HMMU6"},
313 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
314 				GAUDI2_ENGINE_ID_SIZE, "HMMU7"},
315 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
316 				GAUDI2_ENGINE_ID_SIZE, "HMMU8"},
317 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
318 				GAUDI2_ENGINE_ID_SIZE, "HMMU9"},
319 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
320 				GAUDI2_ENGINE_ID_SIZE, "HMMU10"},
321 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
322 				GAUDI2_ENGINE_ID_SIZE, "HMMU11"},
323 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
324 				GAUDI2_ENGINE_ID_SIZE, "HMMU12"},
325 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
326 				GAUDI2_ENGINE_ID_SIZE, "HMMU13"},
327 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
328 				GAUDI2_ENGINE_ID_SIZE, "HMMU14"},
329 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
330 				GAUDI2_ENGINE_ID_SIZE, "HMMU15"},
331 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
332 				GAUDI2_ENGINE_ID_ROT_0, "ROT0"},
333 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
334 				GAUDI2_ENGINE_ID_ROT_1, "ROT1"},
335 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
336 				GAUDI2_ENGINE_ID_PSOC, "CPU"},
337 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 11), mmDCORE3_RTR7_CTRL_BASE,
338 				GAUDI2_ENGINE_ID_PSOC, "PSOC"}
339 };
340 
341 static struct gaudi2_razwi_info mme_razwi_info[] = {
342 		/* MME X high coordinate is N/A, hence using only low coordinates */
343 		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
344 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP0"},
345 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
346 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP1"},
347 		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
348 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_WR"},
349 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
350 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_RD"},
351 		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
352 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE0"},
353 		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
354 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE1"},
355 		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
356 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE2"},
357 		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
358 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE3"},
359 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
360 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE4"},
361 		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
362 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP0"},
363 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
364 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP1"},
365 		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
366 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_WR"},
367 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
368 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_RD"},
369 		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
370 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE0"},
371 		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
372 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE1"},
373 		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
374 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE2"},
375 		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
376 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE3"},
377 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
378 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE4"},
379 		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
380 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP0"},
381 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
382 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP1"},
383 		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
384 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_WR"},
385 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
386 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_RD"},
387 		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
388 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE0"},
389 		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
390 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE1"},
391 		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
392 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE2"},
393 		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
394 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE3"},
395 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
396 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE4"},
397 		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
398 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP0"},
399 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
400 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP1"},
401 		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
402 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_WR"},
403 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
404 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_RD"},
405 		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
406 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE0"},
407 		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
408 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE1"},
409 		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
410 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE2"},
411 		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
412 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE3"},
413 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
414 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE4"}
415 };
416 
417 enum hl_pmmu_fatal_cause {
418 	LATENCY_RD_OUT_FIFO_OVERRUN,
419 	LATENCY_WR_OUT_FIFO_OVERRUN,
420 };
421 
422 enum hl_pcie_drain_ind_cause {
423 	LBW_AXI_DRAIN_IND,
424 	HBW_AXI_DRAIN_IND
425 };
426 
427 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = {
428 	[HBM_ID0] = 0xFFFC,
429 	[HBM_ID1] = 0xFFCF,
430 	[HBM_ID2] = 0xF7F7,
431 	[HBM_ID3] = 0x7F7F,
432 	[HBM_ID4] = 0xFCFF,
433 	[HBM_ID5] = 0xCFFF,
434 };
435 
436 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = {
437 	[0] = HBM_ID0,
438 	[1] = HBM_ID1,
439 	[2] = HBM_ID4,
440 	[3] = HBM_ID5,
441 };
442 
443 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = {
444 	[EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0,
445 	[EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2,
446 	[EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1,
447 	[EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3,
448 	[EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2,
449 	[EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4,
450 	[EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3,
451 	[EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5,
452 };
453 
454 static const int gaudi2_qman_async_event_id[] = {
455 	[GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM,
456 	[GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM,
457 	[GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM,
458 	[GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM,
459 	[GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM,
460 	[GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM,
461 	[GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM,
462 	[GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM,
463 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM,
464 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM,
465 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM,
466 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM,
467 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM,
468 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM,
469 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM,
470 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM,
471 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM,
472 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM,
473 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM,
474 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM,
475 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM,
476 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM,
477 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM,
478 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM,
479 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM,
480 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM,
481 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM,
482 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM,
483 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM,
484 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM,
485 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM,
486 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM,
487 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM,
488 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM,
489 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM,
490 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM,
491 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM,
492 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM,
493 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM,
494 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM,
495 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM,
496 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM,
497 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM,
498 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM,
499 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM,
500 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM,
501 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM,
502 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM,
503 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM,
504 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM,
505 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM,
506 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM,
507 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM,
508 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM,
509 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM,
510 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM,
511 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM,
512 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM,
513 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM,
514 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM,
515 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM,
516 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM,
517 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM,
518 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM,
519 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM,
520 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM,
521 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM,
522 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM,
523 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM,
524 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM,
525 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM,
526 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM,
527 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM,
528 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM,
529 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM,
530 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM,
531 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM,
532 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM,
533 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM,
534 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM,
535 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM,
536 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM,
537 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM,
538 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM,
539 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM,
540 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM,
541 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM,
542 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM,
543 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM,
544 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM,
545 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM,
546 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM,
547 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM,
548 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM,
549 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM,
550 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM,
551 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM,
552 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM,
553 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM,
554 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM,
555 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM,
556 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM,
557 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM,
558 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM,
559 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM,
560 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM,
561 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM,
562 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM,
563 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM,
564 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM,
565 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM,
566 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM,
567 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM,
568 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM,
569 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM,
570 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM,
571 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM,
572 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM,
573 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM,
574 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM,
575 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM,
576 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM,
577 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM,
578 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM,
579 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM,
580 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM,
581 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM,
582 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM,
583 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM,
584 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM,
585 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM,
586 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM,
587 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM,
588 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM,
589 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM,
590 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM,
591 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM,
592 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM,
593 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM,
594 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM,
595 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM,
596 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM,
597 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM,
598 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM,
599 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM,
600 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM,
601 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM,
602 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM,
603 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM,
604 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM,
605 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM,
606 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM,
607 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM,
608 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM,
609 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM,
610 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM,
611 	[GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0,
612 	[GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0,
613 	[GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0,
614 	[GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0,
615 	[GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1,
616 	[GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1,
617 	[GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1,
618 	[GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1,
619 	[GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0,
620 	[GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0,
621 	[GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0,
622 	[GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0,
623 	[GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1,
624 	[GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1,
625 	[GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1,
626 	[GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1,
627 	[GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0,
628 	[GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0,
629 	[GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0,
630 	[GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0,
631 	[GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1,
632 	[GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1,
633 	[GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1,
634 	[GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1,
635 	[GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0,
636 	[GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0,
637 	[GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0,
638 	[GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0,
639 	[GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1,
640 	[GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1,
641 	[GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1,
642 	[GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1,
643 	[GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0,
644 	[GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0,
645 	[GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0,
646 	[GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0,
647 	[GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1,
648 	[GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1,
649 	[GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1,
650 	[GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1,
651 	[GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0,
652 	[GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0,
653 	[GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0,
654 	[GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0,
655 	[GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1,
656 	[GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1,
657 	[GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1,
658 	[GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1,
659 	[GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0,
660 	[GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0,
661 	[GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0,
662 	[GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0,
663 	[GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1,
664 	[GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1,
665 	[GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1,
666 	[GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1,
667 	[GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0,
668 	[GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0,
669 	[GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0,
670 	[GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0,
671 	[GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1,
672 	[GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1,
673 	[GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1,
674 	[GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1,
675 	[GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0,
676 	[GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0,
677 	[GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0,
678 	[GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0,
679 	[GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1,
680 	[GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1,
681 	[GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1,
682 	[GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1,
683 	[GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0,
684 	[GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0,
685 	[GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0,
686 	[GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0,
687 	[GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1,
688 	[GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1,
689 	[GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1,
690 	[GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1,
691 	[GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0,
692 	[GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0,
693 	[GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0,
694 	[GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0,
695 	[GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1,
696 	[GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1,
697 	[GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1,
698 	[GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1,
699 	[GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0,
700 	[GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0,
701 	[GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0,
702 	[GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0,
703 	[GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1,
704 	[GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1,
705 	[GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1,
706 	[GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1,
707 	[GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
708 	[GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
709 	[GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
710 	[GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
711 	[GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
712 	[GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
713 	[GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
714 	[GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM
715 };
716 
717 static const int gaudi2_dma_core_async_event_id[] = {
718 	[DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE,
719 	[DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE,
720 	[DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE,
721 	[DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE,
722 	[DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE,
723 	[DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE,
724 	[DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE,
725 	[DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE,
726 	[DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE,
727 	[DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE,
728 	[DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
729 };
730 
731 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
732 	"qman sei intr",
733 	"arc sei intr"
734 };
735 
736 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = {
737 	"AXI_TERMINATOR WR",
738 	"AXI_TERMINATOR RD",
739 	"AXI SPLIT SEI Status"
740 };
741 
742 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = {
743 	"cbu_bresp_sei_intr_cause",
744 	"cbu_rresp_sei_intr_cause",
745 	"lbu_bresp_sei_intr_cause",
746 	"lbu_rresp_sei_intr_cause",
747 	"cbu_axi_split_intr_cause",
748 	"lbu_axi_split_intr_cause",
749 	"arc_ip_excptn_sei_intr_cause",
750 	"dmi_bresp_sei_intr_cause",
751 	"aux2apb_err_sei_intr_cause",
752 	"cfg_lbw_wr_terminated_intr_cause",
753 	"cfg_lbw_rd_terminated_intr_cause",
754 	"cfg_dccm_wr_terminated_intr_cause",
755 	"cfg_dccm_rd_terminated_intr_cause",
756 	"cfg_hbw_rd_terminated_intr_cause"
757 };
758 
759 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = {
760 	"msix_vcd_hbw_sei",
761 	"msix_l2c_hbw_sei",
762 	"msix_nrm_hbw_sei",
763 	"msix_abnrm_hbw_sei",
764 	"msix_vcd_lbw_sei",
765 	"msix_l2c_lbw_sei",
766 	"msix_nrm_lbw_sei",
767 	"msix_abnrm_lbw_sei",
768 	"apb_vcd_lbw_sei",
769 	"apb_l2c_lbw_sei",
770 	"apb_nrm_lbw_sei",
771 	"apb_abnrm_lbw_sei",
772 	"dec_sei",
773 	"dec_apb_sei",
774 	"trc_apb_sei",
775 	"lbw_mstr_if_sei",
776 	"axi_split_bresp_err_sei",
777 	"hbw_axi_wr_viol_sei",
778 	"hbw_axi_rd_viol_sei",
779 	"lbw_axi_wr_viol_sei",
780 	"lbw_axi_rd_viol_sei",
781 	"vcd_spi",
782 	"l2c_spi",
783 	"nrm_spi",
784 	"abnrm_spi",
785 };
786 
787 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = {
788 	"PQ AXI HBW error",
789 	"CQ AXI HBW error",
790 	"CP AXI HBW error",
791 	"CP error due to undefined OPCODE",
792 	"CP encountered STOP OPCODE",
793 	"CP AXI LBW error",
794 	"CP WRREG32 or WRBULK returned error",
795 	"N/A",
796 	"FENCE 0 inc over max value and clipped",
797 	"FENCE 1 inc over max value and clipped",
798 	"FENCE 2 inc over max value and clipped",
799 	"FENCE 3 inc over max value and clipped",
800 	"FENCE 0 dec under min value and clipped",
801 	"FENCE 1 dec under min value and clipped",
802 	"FENCE 2 dec under min value and clipped",
803 	"FENCE 3 dec under min value and clipped",
804 	"CPDMA Up overflow",
805 	"PQC L2H error"
806 };
807 
808 static const char * const gaudi2_lower_qman_error_cause[GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE] = {
809 	"RSVD0",
810 	"CQ AXI HBW error",
811 	"CP AXI HBW error",
812 	"CP error due to undefined OPCODE",
813 	"CP encountered STOP OPCODE",
814 	"CP AXI LBW error",
815 	"CP WRREG32 or WRBULK returned error",
816 	"N/A",
817 	"FENCE 0 inc over max value and clipped",
818 	"FENCE 1 inc over max value and clipped",
819 	"FENCE 2 inc over max value and clipped",
820 	"FENCE 3 inc over max value and clipped",
821 	"FENCE 0 dec under min value and clipped",
822 	"FENCE 1 dec under min value and clipped",
823 	"FENCE 2 dec under min value and clipped",
824 	"FENCE 3 dec under min value and clipped",
825 	"CPDMA Up overflow",
826 	"RSVD17",
827 	"CQ_WR_IFIFO_CI_ERR",
828 	"CQ_WR_CTL_CI_ERR",
829 	"ARC_CQF_RD_ERR",
830 	"ARC_CQ_WR_IFIFO_CI_ERR",
831 	"ARC_CQ_WR_CTL_CI_ERR",
832 	"ARC_AXI_ERR",
833 	"CP_SWITCH_WDT_ERR"
834 };
835 
836 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = {
837 	"Choice push while full error",
838 	"Choice Q watchdog error",
839 	"MSG AXI LBW returned with error"
840 };
841 
842 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = {
843 	"qm_axi_err",
844 	"qm_trace_fence_events",
845 	"qm_sw_err",
846 	"qm_cp_sw_stop",
847 	"lbw_mstr_rresp_err",
848 	"lbw_mstr_bresp_err",
849 	"lbw_msg_slverr",
850 	"hbw_msg_slverr",
851 	"wbc_slverr",
852 	"hbw_mstr_rresp_err",
853 	"hbw_mstr_bresp_err",
854 	"sb_resp_intr",
855 	"mrsb_resp_intr",
856 	"core_dw_status_0",
857 	"core_dw_status_1",
858 	"core_dw_status_2",
859 	"core_dw_status_3",
860 	"core_dw_status_4",
861 	"core_dw_status_5",
862 	"core_dw_status_6",
863 	"core_dw_status_7",
864 	"async_arc2cpu_sei_intr",
865 };
866 
867 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = {
868 	"tpc_address_exceed_slm",
869 	"tpc_div_by_0",
870 	"tpc_spu_mac_overflow",
871 	"tpc_spu_addsub_overflow",
872 	"tpc_spu_abs_overflow",
873 	"tpc_spu_fma_fp_dst_nan",
874 	"tpc_spu_fma_fp_dst_inf",
875 	"tpc_spu_convert_fp_dst_nan",
876 	"tpc_spu_convert_fp_dst_inf",
877 	"tpc_spu_fp_dst_denorm",
878 	"tpc_vpu_mac_overflow",
879 	"tpc_vpu_addsub_overflow",
880 	"tpc_vpu_abs_overflow",
881 	"tpc_vpu_convert_fp_dst_nan",
882 	"tpc_vpu_convert_fp_dst_inf",
883 	"tpc_vpu_fma_fp_dst_nan",
884 	"tpc_vpu_fma_fp_dst_inf",
885 	"tpc_vpu_fp_dst_denorm",
886 	"tpc_assertions",
887 	"tpc_illegal_instruction",
888 	"tpc_pc_wrap_around",
889 	"tpc_qm_sw_err",
890 	"tpc_hbw_rresp_err",
891 	"tpc_hbw_bresp_err",
892 	"tpc_lbw_rresp_err",
893 	"tpc_lbw_bresp_err",
894 	"st_unlock_already_locked",
895 	"invalid_lock_access",
896 	"LD_L protection violation",
897 	"ST_L protection violation",
898 	"D$ L0CS mismatch",
899 };
900 
901 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
902 	"agu_resp_intr",
903 	"qman_axi_err",
904 	"wap sei (wbc axi err)",
905 	"arc sei",
906 	"cfg access error",
907 	"qm_sw_err",
908 	"sbte_dbg_intr_0",
909 	"sbte_dbg_intr_1",
910 	"sbte_dbg_intr_2",
911 	"sbte_dbg_intr_3",
912 	"sbte_dbg_intr_4",
913 	"sbte_prtn_intr_0",
914 	"sbte_prtn_intr_1",
915 	"sbte_prtn_intr_2",
916 	"sbte_prtn_intr_3",
917 	"sbte_prtn_intr_4",
918 };
919 
920 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
921 	"WBC ERR RESP_0",
922 	"WBC ERR RESP_1",
923 	"AP SOURCE POS INF",
924 	"AP SOURCE NEG INF",
925 	"AP SOURCE NAN",
926 	"AP RESULT POS INF",
927 	"AP RESULT NEG INF",
928 };
929 
930 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
931 	"HBW Read returned with error RRESP",
932 	"HBW write returned with error BRESP",
933 	"LBW write returned with error BRESP",
934 	"descriptor_fifo_overflow",
935 	"KDMA SB LBW Read returned with error",
936 	"KDMA WBC LBW Write returned with error",
937 	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
938 	"WRONG CFG FOR COMMIT IN LIN DMA"
939 };
940 
941 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
942 	"HBW/LBW Read returned with error RRESP",
943 	"HBW/LBW write returned with error BRESP",
944 	"LBW write returned with error BRESP",
945 	"descriptor_fifo_overflow",
946 	"KDMA SB LBW Read returned with error",
947 	"KDMA WBC LBW Write returned with error",
948 	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
949 	"WRONG CFG FOR COMMIT IN LIN DMA"
950 };
951 
952 struct gaudi2_sm_sei_cause_data {
953 	const char *cause_name;
954 	const char *log_name;
955 };
956 
957 static const struct gaudi2_sm_sei_cause_data
958 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = {
959 	{"calculated SO value overflow/underflow", "SOB ID"},
960 	{"payload address of monitor is not aligned to 4B", "monitor addr"},
961 	{"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id"},
962 };
963 
964 static const char * const
965 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = {
966 	"LATENCY_RD_OUT_FIFO_OVERRUN",
967 	"LATENCY_WR_OUT_FIFO_OVERRUN",
968 };
969 
970 static const char * const
971 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = {
972 	"LATENCY_RD_OUT_FIFO_OVERRUN",
973 	"LATENCY_WR_OUT_FIFO_OVERRUN",
974 };
975 
976 static const char * const
977 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = {
978 	"AXI drain HBW",
979 	"AXI drain LBW",
980 };
981 
982 static const char * const
983 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
984 	"HBW error response",
985 	"LBW error response",
986 	"TLP is blocked by RR"
987 };
988 
989 static const int gaudi2_queue_id_to_engine_id[] = {
990 	[GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_ENGINE_ID_PDMA_0,
991 	[GAUDI2_QUEUE_ID_PDMA_1_0...GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_ENGINE_ID_PDMA_1,
992 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] =
993 							GAUDI2_DCORE0_ENGINE_ID_EDMA_0,
994 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] =
995 							GAUDI2_DCORE0_ENGINE_ID_EDMA_1,
996 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] =
997 							GAUDI2_DCORE1_ENGINE_ID_EDMA_0,
998 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] =
999 							GAUDI2_DCORE1_ENGINE_ID_EDMA_1,
1000 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] =
1001 							GAUDI2_DCORE2_ENGINE_ID_EDMA_0,
1002 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] =
1003 							GAUDI2_DCORE2_ENGINE_ID_EDMA_1,
1004 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] =
1005 							GAUDI2_DCORE3_ENGINE_ID_EDMA_0,
1006 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] =
1007 							GAUDI2_DCORE3_ENGINE_ID_EDMA_1,
1008 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3] =
1009 							GAUDI2_DCORE0_ENGINE_ID_MME,
1010 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3] =
1011 							GAUDI2_DCORE1_ENGINE_ID_MME,
1012 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3] =
1013 							GAUDI2_DCORE2_ENGINE_ID_MME,
1014 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3] =
1015 							GAUDI2_DCORE3_ENGINE_ID_MME,
1016 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0...GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] =
1017 							GAUDI2_DCORE0_ENGINE_ID_TPC_0,
1018 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0...GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] =
1019 							GAUDI2_DCORE0_ENGINE_ID_TPC_1,
1020 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0...GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] =
1021 							GAUDI2_DCORE0_ENGINE_ID_TPC_2,
1022 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0...GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] =
1023 							GAUDI2_DCORE0_ENGINE_ID_TPC_3,
1024 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0...GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] =
1025 							GAUDI2_DCORE0_ENGINE_ID_TPC_4,
1026 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0...GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] =
1027 							GAUDI2_DCORE0_ENGINE_ID_TPC_5,
1028 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0...GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] =
1029 							GAUDI2_DCORE0_ENGINE_ID_TPC_6,
1030 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0...GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] =
1031 							GAUDI2_DCORE1_ENGINE_ID_TPC_0,
1032 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0...GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] =
1033 							GAUDI2_DCORE1_ENGINE_ID_TPC_1,
1034 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0...GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] =
1035 							GAUDI2_DCORE1_ENGINE_ID_TPC_2,
1036 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0...GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] =
1037 							GAUDI2_DCORE1_ENGINE_ID_TPC_3,
1038 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0...GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] =
1039 							GAUDI2_DCORE1_ENGINE_ID_TPC_4,
1040 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0...GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] =
1041 							GAUDI2_DCORE1_ENGINE_ID_TPC_5,
1042 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0...GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] =
1043 							GAUDI2_DCORE2_ENGINE_ID_TPC_0,
1044 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0...GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] =
1045 							GAUDI2_DCORE2_ENGINE_ID_TPC_1,
1046 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0...GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] =
1047 							GAUDI2_DCORE2_ENGINE_ID_TPC_2,
1048 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0...GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] =
1049 							GAUDI2_DCORE2_ENGINE_ID_TPC_3,
1050 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0...GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] =
1051 							GAUDI2_DCORE2_ENGINE_ID_TPC_4,
1052 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0...GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] =
1053 							GAUDI2_DCORE2_ENGINE_ID_TPC_5,
1054 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0...GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] =
1055 							GAUDI2_DCORE3_ENGINE_ID_TPC_0,
1056 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0...GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] =
1057 							GAUDI2_DCORE3_ENGINE_ID_TPC_1,
1058 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0...GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] =
1059 							GAUDI2_DCORE3_ENGINE_ID_TPC_2,
1060 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0...GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] =
1061 							GAUDI2_DCORE3_ENGINE_ID_TPC_3,
1062 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0...GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] =
1063 							GAUDI2_DCORE3_ENGINE_ID_TPC_4,
1064 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0...GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] =
1065 							GAUDI2_DCORE3_ENGINE_ID_TPC_5,
1066 	[GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_ENGINE_ID_NIC0_0,
1067 	[GAUDI2_QUEUE_ID_NIC_1_0...GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_ENGINE_ID_NIC0_1,
1068 	[GAUDI2_QUEUE_ID_NIC_2_0...GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_ENGINE_ID_NIC1_0,
1069 	[GAUDI2_QUEUE_ID_NIC_3_0...GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_ENGINE_ID_NIC1_1,
1070 	[GAUDI2_QUEUE_ID_NIC_4_0...GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_ENGINE_ID_NIC2_0,
1071 	[GAUDI2_QUEUE_ID_NIC_5_0...GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_ENGINE_ID_NIC2_1,
1072 	[GAUDI2_QUEUE_ID_NIC_6_0...GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_ENGINE_ID_NIC3_0,
1073 	[GAUDI2_QUEUE_ID_NIC_7_0...GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_ENGINE_ID_NIC3_1,
1074 	[GAUDI2_QUEUE_ID_NIC_8_0...GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_ENGINE_ID_NIC4_0,
1075 	[GAUDI2_QUEUE_ID_NIC_9_0...GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_ENGINE_ID_NIC4_1,
1076 	[GAUDI2_QUEUE_ID_NIC_10_0...GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_ENGINE_ID_NIC5_0,
1077 	[GAUDI2_QUEUE_ID_NIC_11_0...GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_ENGINE_ID_NIC5_1,
1078 	[GAUDI2_QUEUE_ID_NIC_12_0...GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_ENGINE_ID_NIC6_0,
1079 	[GAUDI2_QUEUE_ID_NIC_13_0...GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_ENGINE_ID_NIC6_1,
1080 	[GAUDI2_QUEUE_ID_NIC_14_0...GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_ENGINE_ID_NIC7_0,
1081 	[GAUDI2_QUEUE_ID_NIC_15_0...GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_ENGINE_ID_NIC7_1,
1082 	[GAUDI2_QUEUE_ID_NIC_16_0...GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_ENGINE_ID_NIC8_0,
1083 	[GAUDI2_QUEUE_ID_NIC_17_0...GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_ENGINE_ID_NIC8_1,
1084 	[GAUDI2_QUEUE_ID_NIC_18_0...GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_ENGINE_ID_NIC9_0,
1085 	[GAUDI2_QUEUE_ID_NIC_19_0...GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_ENGINE_ID_NIC9_1,
1086 	[GAUDI2_QUEUE_ID_NIC_20_0...GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_ENGINE_ID_NIC10_0,
1087 	[GAUDI2_QUEUE_ID_NIC_21_0...GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_ENGINE_ID_NIC10_1,
1088 	[GAUDI2_QUEUE_ID_NIC_22_0...GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_ENGINE_ID_NIC11_0,
1089 	[GAUDI2_QUEUE_ID_NIC_23_0...GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_ENGINE_ID_NIC11_1,
1090 	[GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_ENGINE_ID_ROT_0,
1091 	[GAUDI2_QUEUE_ID_ROT_1_0...GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_ENGINE_ID_ROT_1,
1092 };
1093 
1094 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
1095 	[GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
1096 	[GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
1097 	[GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE,
1098 	[GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE,
1099 	[GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE,
1100 	[GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE,
1101 	[GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE,
1102 	[GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE,
1103 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE,
1104 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE,
1105 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE,
1106 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE,
1107 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE,
1108 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE,
1109 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE,
1110 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE,
1111 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE,
1112 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE,
1113 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE,
1114 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE,
1115 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE,
1116 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE,
1117 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE,
1118 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE,
1119 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE,
1120 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE,
1121 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE,
1122 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE,
1123 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE,
1124 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE,
1125 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE,
1126 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE,
1127 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE,
1128 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE,
1129 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE,
1130 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE,
1131 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE,
1132 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE,
1133 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE,
1134 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE,
1135 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE,
1136 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE,
1137 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE,
1138 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE,
1139 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE,
1140 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE,
1141 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE,
1142 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE,
1143 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE,
1144 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE,
1145 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE,
1146 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE,
1147 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE,
1148 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE,
1149 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE,
1150 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE,
1151 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE,
1152 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE,
1153 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE,
1154 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE,
1155 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE,
1156 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE,
1157 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE,
1158 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE,
1159 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE,
1160 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE,
1161 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE,
1162 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE,
1163 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE,
1164 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE,
1165 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE,
1166 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE,
1167 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE,
1168 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE,
1169 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE,
1170 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE,
1171 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE,
1172 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE,
1173 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE,
1174 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE,
1175 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE,
1176 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE,
1177 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE,
1178 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE,
1179 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE,
1180 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE,
1181 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE,
1182 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE,
1183 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE,
1184 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE,
1185 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE,
1186 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE,
1187 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE,
1188 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE,
1189 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE,
1190 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE,
1191 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE,
1192 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE,
1193 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE,
1194 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE,
1195 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE,
1196 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE,
1197 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE,
1198 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE,
1199 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE,
1200 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE,
1201 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE,
1202 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE,
1203 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE,
1204 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE,
1205 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE,
1206 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE,
1207 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE,
1208 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE,
1209 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE,
1210 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE,
1211 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE,
1212 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE,
1213 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE,
1214 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE,
1215 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE,
1216 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE,
1217 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE,
1218 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE,
1219 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE,
1220 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE,
1221 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE,
1222 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE,
1223 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE,
1224 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE,
1225 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE,
1226 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE,
1227 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE,
1228 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE,
1229 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE,
1230 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE,
1231 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE,
1232 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE,
1233 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE,
1234 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE,
1235 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE,
1236 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE,
1237 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE,
1238 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE,
1239 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE,
1240 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE,
1241 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE,
1242 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE,
1243 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE,
1244 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE,
1245 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE,
1246 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE,
1247 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE,
1248 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE,
1249 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE,
1250 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE,
1251 	[GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE,
1252 	[GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE,
1253 	[GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE,
1254 	[GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE,
1255 	[GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE,
1256 	[GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE,
1257 	[GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE,
1258 	[GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE,
1259 	[GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE,
1260 	[GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE,
1261 	[GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE,
1262 	[GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE,
1263 	[GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE,
1264 	[GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE,
1265 	[GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE,
1266 	[GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE,
1267 	[GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE,
1268 	[GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE,
1269 	[GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE,
1270 	[GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE,
1271 	[GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE,
1272 	[GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE,
1273 	[GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE,
1274 	[GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE,
1275 	[GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE,
1276 	[GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE,
1277 	[GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE,
1278 	[GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE,
1279 	[GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE,
1280 	[GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE,
1281 	[GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE,
1282 	[GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE,
1283 	[GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE,
1284 	[GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE,
1285 	[GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE,
1286 	[GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE,
1287 	[GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE,
1288 	[GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE,
1289 	[GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE,
1290 	[GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE,
1291 	[GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE,
1292 	[GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE,
1293 	[GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE,
1294 	[GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE,
1295 	[GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE,
1296 	[GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE,
1297 	[GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE,
1298 	[GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE,
1299 	[GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE,
1300 	[GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE,
1301 	[GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE,
1302 	[GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE,
1303 	[GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE,
1304 	[GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE,
1305 	[GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE,
1306 	[GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE,
1307 	[GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE,
1308 	[GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE,
1309 	[GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE,
1310 	[GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE,
1311 	[GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE,
1312 	[GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE,
1313 	[GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE,
1314 	[GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE,
1315 	[GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE,
1316 	[GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE,
1317 	[GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE,
1318 	[GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE,
1319 	[GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE,
1320 	[GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE,
1321 	[GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE,
1322 	[GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE,
1323 	[GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE,
1324 	[GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE,
1325 	[GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE,
1326 	[GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE,
1327 	[GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE,
1328 	[GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE,
1329 	[GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE,
1330 	[GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE,
1331 	[GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE,
1332 	[GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE,
1333 	[GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE,
1334 	[GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE,
1335 	[GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE,
1336 	[GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE,
1337 	[GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE,
1338 	[GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE,
1339 	[GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE,
1340 	[GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE,
1341 	[GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE,
1342 	[GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE,
1343 	[GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE,
1344 	[GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE,
1345 	[GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE,
1346 	[GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE,
1347 	[GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE,
1348 	[GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE,
1349 	[GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE,
1350 	[GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE,
1351 	[GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE,
1352 	[GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE,
1353 	[GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE,
1354 	[GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE
1355 };
1356 
1357 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = {
1358 	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE,
1359 	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE,
1360 	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE,
1361 	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE,
1362 	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE,
1363 	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE,
1364 	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE,
1365 	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE,
1366 	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE,
1367 	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE,
1368 	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE,
1369 	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE,
1370 	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE,
1371 	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE,
1372 	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE,
1373 	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE,
1374 	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE,
1375 	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE,
1376 	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE,
1377 	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE,
1378 	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE,
1379 	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE,
1380 	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE,
1381 	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE,
1382 	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE,
1383 	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE,
1384 	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE,
1385 	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE,
1386 	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE,
1387 	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE,
1388 	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE,
1389 	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE,
1390 	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE,
1391 	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE,
1392 	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE,
1393 	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE,
1394 	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE,
1395 	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE,
1396 	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE,
1397 	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE,
1398 	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE,
1399 	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE,
1400 	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE,
1401 	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE,
1402 	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE,
1403 	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE,
1404 	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE,
1405 	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE,
1406 	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE,
1407 	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE,
1408 	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE,
1409 	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE,
1410 	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE,
1411 	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE,
1412 	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE,
1413 	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE,
1414 	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE,
1415 	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE,
1416 	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE,
1417 	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE,
1418 	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE,
1419 	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE,
1420 	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE,
1421 	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE,
1422 	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE,
1423 	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE,
1424 	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE,
1425 	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE,
1426 	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE,
1427 };
1428 
1429 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = {
1430 	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE,
1431 	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE,
1432 	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE,
1433 	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE,
1434 	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE,
1435 	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE,
1436 	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE,
1437 	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE,
1438 	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE,
1439 	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE,
1440 	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE,
1441 	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE,
1442 	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE,
1443 	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE,
1444 	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE,
1445 	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE,
1446 	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE,
1447 	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE,
1448 	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE,
1449 	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE,
1450 	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE,
1451 	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE,
1452 	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE,
1453 	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE,
1454 	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE,
1455 	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE,
1456 	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE,
1457 	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE,
1458 	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE,
1459 	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE,
1460 	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE,
1461 	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE,
1462 	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE,
1463 	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE,
1464 	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE,
1465 	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE,
1466 	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE,
1467 	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE,
1468 	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE,
1469 	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE,
1470 	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE,
1471 	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE,
1472 	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE,
1473 	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE,
1474 	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE,
1475 	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE,
1476 	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE,
1477 	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE,
1478 	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE,
1479 	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE,
1480 	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE,
1481 	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE,
1482 	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE,
1483 	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE,
1484 	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE,
1485 	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE,
1486 	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE,
1487 	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE,
1488 	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE,
1489 	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE,
1490 	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE,
1491 	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE,
1492 	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE,
1493 	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE,
1494 	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE,
1495 	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE,
1496 	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE,
1497 	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE,
1498 	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE,
1499 };
1500 
1501 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = {
1502 	[MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE,
1503 	[MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE,
1504 	[MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE,
1505 	[MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE,
1506 };
1507 
1508 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = {
1509 	[GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0,
1510 	[GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0,
1511 	[GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0,
1512 	[GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0,
1513 	[GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1,
1514 	[GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1,
1515 	[GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1,
1516 	[GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1,
1517 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0,
1518 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0,
1519 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0,
1520 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0,
1521 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1,
1522 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1,
1523 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1,
1524 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1,
1525 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0,
1526 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0,
1527 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0,
1528 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0,
1529 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0,
1530 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0,
1531 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0,
1532 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0,
1533 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1,
1534 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1,
1535 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1,
1536 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1,
1537 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2,
1538 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2,
1539 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2,
1540 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2,
1541 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3,
1542 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3,
1543 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3,
1544 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3,
1545 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4,
1546 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4,
1547 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4,
1548 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4,
1549 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5,
1550 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5,
1551 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5,
1552 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5,
1553 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24,
1554 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24,
1555 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24,
1556 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24,
1557 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2,
1558 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2,
1559 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2,
1560 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2,
1561 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3,
1562 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3,
1563 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3,
1564 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3,
1565 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4,
1566 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4,
1567 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4,
1568 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4,
1569 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6,
1570 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6,
1571 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6,
1572 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6,
1573 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7,
1574 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7,
1575 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7,
1576 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7,
1577 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8,
1578 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8,
1579 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8,
1580 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8,
1581 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9,
1582 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9,
1583 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9,
1584 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9,
1585 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10,
1586 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10,
1587 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10,
1588 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10,
1589 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11,
1590 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11,
1591 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11,
1592 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11,
1593 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4,
1594 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4,
1595 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4,
1596 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4,
1597 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5,
1598 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5,
1599 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5,
1600 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5,
1601 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1,
1602 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1,
1603 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1,
1604 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1,
1605 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12,
1606 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12,
1607 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12,
1608 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12,
1609 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13,
1610 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13,
1611 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13,
1612 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13,
1613 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14,
1614 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14,
1615 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14,
1616 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14,
1617 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15,
1618 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15,
1619 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15,
1620 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15,
1621 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16,
1622 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16,
1623 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16,
1624 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16,
1625 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17,
1626 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17,
1627 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17,
1628 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17,
1629 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6,
1630 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6,
1631 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6,
1632 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6,
1633 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7,
1634 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7,
1635 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7,
1636 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7,
1637 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5,
1638 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5,
1639 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5,
1640 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5,
1641 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18,
1642 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18,
1643 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18,
1644 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18,
1645 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19,
1646 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19,
1647 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19,
1648 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19,
1649 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20,
1650 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20,
1651 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20,
1652 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20,
1653 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21,
1654 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21,
1655 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21,
1656 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21,
1657 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22,
1658 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22,
1659 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22,
1660 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22,
1661 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23,
1662 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23,
1663 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23,
1664 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23,
1665 	[GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0,
1666 	[GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0,
1667 	[GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0,
1668 	[GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0,
1669 	[GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1,
1670 	[GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1,
1671 	[GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1,
1672 	[GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1,
1673 	[GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2,
1674 	[GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2,
1675 	[GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2,
1676 	[GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2,
1677 	[GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3,
1678 	[GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3,
1679 	[GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3,
1680 	[GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3,
1681 	[GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4,
1682 	[GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4,
1683 	[GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4,
1684 	[GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4,
1685 	[GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5,
1686 	[GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5,
1687 	[GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5,
1688 	[GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5,
1689 	[GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6,
1690 	[GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6,
1691 	[GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6,
1692 	[GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6,
1693 	[GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7,
1694 	[GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7,
1695 	[GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7,
1696 	[GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7,
1697 	[GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8,
1698 	[GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8,
1699 	[GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8,
1700 	[GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8,
1701 	[GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9,
1702 	[GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9,
1703 	[GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9,
1704 	[GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9,
1705 	[GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10,
1706 	[GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10,
1707 	[GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10,
1708 	[GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10,
1709 	[GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11,
1710 	[GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11,
1711 	[GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11,
1712 	[GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11,
1713 	[GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12,
1714 	[GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12,
1715 	[GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12,
1716 	[GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12,
1717 	[GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13,
1718 	[GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13,
1719 	[GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13,
1720 	[GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13,
1721 	[GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14,
1722 	[GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14,
1723 	[GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14,
1724 	[GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14,
1725 	[GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15,
1726 	[GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15,
1727 	[GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15,
1728 	[GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15,
1729 	[GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16,
1730 	[GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16,
1731 	[GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16,
1732 	[GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16,
1733 	[GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17,
1734 	[GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17,
1735 	[GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17,
1736 	[GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17,
1737 	[GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18,
1738 	[GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18,
1739 	[GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18,
1740 	[GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18,
1741 	[GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19,
1742 	[GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19,
1743 	[GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19,
1744 	[GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19,
1745 	[GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20,
1746 	[GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20,
1747 	[GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20,
1748 	[GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20,
1749 	[GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21,
1750 	[GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21,
1751 	[GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21,
1752 	[GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21,
1753 	[GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22,
1754 	[GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22,
1755 	[GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22,
1756 	[GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22,
1757 	[GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23,
1758 	[GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23,
1759 	[GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23,
1760 	[GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23,
1761 	[GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0,
1762 	[GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0,
1763 	[GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0,
1764 	[GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0,
1765 	[GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1,
1766 	[GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1,
1767 	[GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1,
1768 	[GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1
1769 };
1770 
1771 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = {
1772 	[DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE,
1773 	[DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE,
1774 	[DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE,
1775 	[DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE,
1776 	[DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE,
1777 	[DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE,
1778 	[DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE,
1779 	[DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE,
1780 	[DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE,
1781 	[DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE,
1782 	[DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE
1783 };
1784 
1785 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = {
1786 	[MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE,
1787 	[MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE,
1788 	[MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE,
1789 	[MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE
1790 };
1791 
1792 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
1793 	[TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE,
1794 	[TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE,
1795 	[TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE,
1796 	[TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE,
1797 	[TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE,
1798 	[TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE,
1799 	[TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE,
1800 	[TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE,
1801 	[TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE,
1802 	[TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE,
1803 	[TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE,
1804 	[TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE,
1805 	[TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE,
1806 	[TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE,
1807 	[TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE,
1808 	[TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE,
1809 	[TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE,
1810 	[TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE,
1811 	[TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE,
1812 	[TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE,
1813 	[TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE,
1814 	[TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE,
1815 	[TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE,
1816 	[TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE,
1817 	[TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
1818 };
1819 
1820 static const u32 gaudi2_tpc_eml_cfg_blocks_bases[TPC_ID_SIZE] = {
1821 	[TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_EML_CFG_BASE,
1822 	[TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_EML_CFG_BASE,
1823 	[TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_EML_CFG_BASE,
1824 	[TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_EML_CFG_BASE,
1825 	[TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_EML_CFG_BASE,
1826 	[TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_EML_CFG_BASE,
1827 	[TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_EML_CFG_BASE,
1828 	[TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_EML_CFG_BASE,
1829 	[TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_EML_CFG_BASE,
1830 	[TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_EML_CFG_BASE,
1831 	[TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_EML_CFG_BASE,
1832 	[TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_EML_CFG_BASE,
1833 	[TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_EML_CFG_BASE,
1834 	[TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_EML_CFG_BASE,
1835 	[TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_EML_CFG_BASE,
1836 	[TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_EML_CFG_BASE,
1837 	[TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_EML_CFG_BASE,
1838 	[TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_EML_CFG_BASE,
1839 	[TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_EML_CFG_BASE,
1840 	[TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_EML_CFG_BASE,
1841 	[TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_EML_CFG_BASE,
1842 	[TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_EML_CFG_BASE,
1843 	[TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_EML_CFG_BASE,
1844 	[TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_EML_CFG_BASE,
1845 	[TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_EML_CFG_BASE,
1846 };
1847 
1848 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
1849 	[ROTATOR_ID_0] = mmROT0_BASE,
1850 	[ROTATOR_ID_1] = mmROT1_BASE
1851 };
1852 
1853 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = {
1854 	[TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0,
1855 	[TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0,
1856 	[TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0,
1857 	[TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0,
1858 	[TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0,
1859 	[TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0,
1860 	[TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0,
1861 	[TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0,
1862 	[TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0,
1863 	[TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0,
1864 	[TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0,
1865 	[TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0,
1866 	[TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0,
1867 	[TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0,
1868 	[TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0,
1869 	[TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0,
1870 	[TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0,
1871 	[TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0,
1872 	[TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0,
1873 	[TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0,
1874 	[TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0,
1875 	[TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0,
1876 	[TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0,
1877 	[TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0,
1878 	[TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0,
1879 };
1880 
1881 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
1882 	[ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0,
1883 	[ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
1884 };
1885 
1886 static const u32 gaudi2_tpc_engine_id_to_tpc_id[] = {
1887 	[GAUDI2_DCORE0_ENGINE_ID_TPC_0] = TPC_ID_DCORE0_TPC0,
1888 	[GAUDI2_DCORE0_ENGINE_ID_TPC_1] = TPC_ID_DCORE0_TPC1,
1889 	[GAUDI2_DCORE0_ENGINE_ID_TPC_2] = TPC_ID_DCORE0_TPC2,
1890 	[GAUDI2_DCORE0_ENGINE_ID_TPC_3] = TPC_ID_DCORE0_TPC3,
1891 	[GAUDI2_DCORE0_ENGINE_ID_TPC_4] = TPC_ID_DCORE0_TPC4,
1892 	[GAUDI2_DCORE0_ENGINE_ID_TPC_5] = TPC_ID_DCORE0_TPC5,
1893 	[GAUDI2_DCORE1_ENGINE_ID_TPC_0] = TPC_ID_DCORE1_TPC0,
1894 	[GAUDI2_DCORE1_ENGINE_ID_TPC_1] = TPC_ID_DCORE1_TPC1,
1895 	[GAUDI2_DCORE1_ENGINE_ID_TPC_2] = TPC_ID_DCORE1_TPC2,
1896 	[GAUDI2_DCORE1_ENGINE_ID_TPC_3] = TPC_ID_DCORE1_TPC3,
1897 	[GAUDI2_DCORE1_ENGINE_ID_TPC_4] = TPC_ID_DCORE1_TPC4,
1898 	[GAUDI2_DCORE1_ENGINE_ID_TPC_5] = TPC_ID_DCORE1_TPC5,
1899 	[GAUDI2_DCORE2_ENGINE_ID_TPC_0] = TPC_ID_DCORE2_TPC0,
1900 	[GAUDI2_DCORE2_ENGINE_ID_TPC_1] = TPC_ID_DCORE2_TPC1,
1901 	[GAUDI2_DCORE2_ENGINE_ID_TPC_2] = TPC_ID_DCORE2_TPC2,
1902 	[GAUDI2_DCORE2_ENGINE_ID_TPC_3] = TPC_ID_DCORE2_TPC3,
1903 	[GAUDI2_DCORE2_ENGINE_ID_TPC_4] = TPC_ID_DCORE2_TPC4,
1904 	[GAUDI2_DCORE2_ENGINE_ID_TPC_5] = TPC_ID_DCORE2_TPC5,
1905 	[GAUDI2_DCORE3_ENGINE_ID_TPC_0] = TPC_ID_DCORE3_TPC0,
1906 	[GAUDI2_DCORE3_ENGINE_ID_TPC_1] = TPC_ID_DCORE3_TPC1,
1907 	[GAUDI2_DCORE3_ENGINE_ID_TPC_2] = TPC_ID_DCORE3_TPC2,
1908 	[GAUDI2_DCORE3_ENGINE_ID_TPC_3] = TPC_ID_DCORE3_TPC3,
1909 	[GAUDI2_DCORE3_ENGINE_ID_TPC_4] = TPC_ID_DCORE3_TPC4,
1910 	[GAUDI2_DCORE3_ENGINE_ID_TPC_5] = TPC_ID_DCORE3_TPC5,
1911 	/* the PCI TPC is placed last (mapped liked HW) */
1912 	[GAUDI2_DCORE0_ENGINE_ID_TPC_6] = TPC_ID_DCORE0_TPC6,
1913 };
1914 
1915 static const u32 gaudi2_mme_engine_id_to_mme_id[] = {
1916 	[GAUDI2_DCORE0_ENGINE_ID_MME] = MME_ID_DCORE0,
1917 	[GAUDI2_DCORE1_ENGINE_ID_MME] = MME_ID_DCORE1,
1918 	[GAUDI2_DCORE2_ENGINE_ID_MME] = MME_ID_DCORE2,
1919 	[GAUDI2_DCORE3_ENGINE_ID_MME] = MME_ID_DCORE3,
1920 };
1921 
1922 static const u32 gaudi2_edma_engine_id_to_edma_id[] = {
1923 	[GAUDI2_ENGINE_ID_PDMA_0] = DMA_CORE_ID_PDMA0,
1924 	[GAUDI2_ENGINE_ID_PDMA_1] = DMA_CORE_ID_PDMA1,
1925 	[GAUDI2_DCORE0_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA0,
1926 	[GAUDI2_DCORE0_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA1,
1927 	[GAUDI2_DCORE1_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA2,
1928 	[GAUDI2_DCORE1_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA3,
1929 	[GAUDI2_DCORE2_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA4,
1930 	[GAUDI2_DCORE2_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA5,
1931 	[GAUDI2_DCORE3_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA6,
1932 	[GAUDI2_DCORE3_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA7,
1933 	[GAUDI2_ENGINE_ID_KDMA] = DMA_CORE_ID_KDMA,
1934 };
1935 
1936 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1937 	GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
1938 	GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
1939 	GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
1940 	GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0,
1941 	GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
1942 	GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0,
1943 	GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0,
1944 	GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0,
1945 };
1946 
1947 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = {
1948 	"gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal",
1949 	"gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal",
1950 	"gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal",
1951 	"gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal",
1952 	"gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal",
1953 	"gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal",
1954 	"gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal",
1955 	"gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal",
1956 	"gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal",
1957 	"gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
1958 };
1959 
1960 enum rtr_id {
1961 	DCORE0_RTR0,
1962 	DCORE0_RTR1,
1963 	DCORE0_RTR2,
1964 	DCORE0_RTR3,
1965 	DCORE0_RTR4,
1966 	DCORE0_RTR5,
1967 	DCORE0_RTR6,
1968 	DCORE0_RTR7,
1969 	DCORE1_RTR0,
1970 	DCORE1_RTR1,
1971 	DCORE1_RTR2,
1972 	DCORE1_RTR3,
1973 	DCORE1_RTR4,
1974 	DCORE1_RTR5,
1975 	DCORE1_RTR6,
1976 	DCORE1_RTR7,
1977 	DCORE2_RTR0,
1978 	DCORE2_RTR1,
1979 	DCORE2_RTR2,
1980 	DCORE2_RTR3,
1981 	DCORE2_RTR4,
1982 	DCORE2_RTR5,
1983 	DCORE2_RTR6,
1984 	DCORE2_RTR7,
1985 	DCORE3_RTR0,
1986 	DCORE3_RTR1,
1987 	DCORE3_RTR2,
1988 	DCORE3_RTR3,
1989 	DCORE3_RTR4,
1990 	DCORE3_RTR5,
1991 	DCORE3_RTR6,
1992 	DCORE3_RTR7,
1993 };
1994 
1995 static const u32 gaudi2_tpc_initiator_hbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1996 	DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
1997 	DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
1998 	DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
1999 	DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
2000 	DCORE0_RTR0
2001 };
2002 
2003 static const u32 gaudi2_tpc_initiator_lbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
2004 	DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2,
2005 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5,
2006 	DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, DCORE2_RTR0, DCORE2_RTR0,
2007 	DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, DCORE3_RTR7, DCORE3_RTR7,
2008 	DCORE0_RTR0
2009 };
2010 
2011 static const u32 gaudi2_dec_initiator_hbw_rtr_id[NUMBER_OF_DEC] = {
2012 	DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
2013 	DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
2014 };
2015 
2016 static const u32 gaudi2_dec_initiator_lbw_rtr_id[NUMBER_OF_DEC] = {
2017 	DCORE0_RTR1, DCORE0_RTR1, DCORE1_RTR6, DCORE1_RTR6, DCORE2_RTR1, DCORE2_RTR1,
2018 	DCORE3_RTR6, DCORE3_RTR6, DCORE0_RTR0, DCORE0_RTR0
2019 };
2020 
2021 static const u32 gaudi2_nic_initiator_hbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
2022 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
2023 	DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
2024 };
2025 
2026 static const u32 gaudi2_nic_initiator_lbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
2027 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
2028 	DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
2029 };
2030 
2031 static const u32 gaudi2_edma_initiator_hbw_sft[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
2032 	mmSFT0_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
2033 	mmSFT0_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
2034 	mmSFT1_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
2035 	mmSFT1_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
2036 	mmSFT2_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
2037 	mmSFT2_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
2038 	mmSFT3_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
2039 	mmSFT3_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE
2040 };
2041 
2042 static const u32 gaudi2_pdma_initiator_hbw_rtr_id[NUM_OF_PDMA] = {
2043 	DCORE0_RTR0, DCORE0_RTR0
2044 };
2045 
2046 static const u32 gaudi2_pdma_initiator_lbw_rtr_id[NUM_OF_PDMA] = {
2047 	DCORE0_RTR2, DCORE0_RTR2
2048 };
2049 
2050 static const u32 gaudi2_rot_initiator_hbw_rtr_id[NUM_OF_ROT] = {
2051 	DCORE2_RTR0, DCORE3_RTR7
2052 };
2053 
2054 static const u32 gaudi2_rot_initiator_lbw_rtr_id[NUM_OF_ROT] = {
2055 	DCORE2_RTR2, DCORE3_RTR5
2056 };
2057 
2058 struct mme_initiators_rtr_id {
2059 	u32 wap0;
2060 	u32 wap1;
2061 	u32 write;
2062 	u32 read;
2063 	u32 sbte0;
2064 	u32 sbte1;
2065 	u32 sbte2;
2066 	u32 sbte3;
2067 	u32 sbte4;
2068 };
2069 
2070 enum mme_initiators {
2071 	MME_WAP0 = 0,
2072 	MME_WAP1,
2073 	MME_WRITE,
2074 	MME_READ,
2075 	MME_SBTE0,
2076 	MME_SBTE1,
2077 	MME_SBTE2,
2078 	MME_SBTE3,
2079 	MME_SBTE4,
2080 	MME_INITIATORS_MAX
2081 };
2082 
2083 static const struct mme_initiators_rtr_id
2084 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = {
2085 	{ .wap0 = 5, .wap1 = 7, .write = 6, .read = 7,
2086 	.sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6},
2087 	{ .wap0 = 10, .wap1 = 8, .write = 9, .read = 8,
2088 	.sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8},
2089 	{ .wap0 = 21, .wap1 = 23, .write = 22, .read = 23,
2090 	.sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23},
2091 	{ .wap0 = 30, .wap1 = 28, .write = 29, .read = 30,
2092 	.sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28},
2093 };
2094 
2095 enum razwi_event_sources {
2096 	RAZWI_TPC,
2097 	RAZWI_MME,
2098 	RAZWI_EDMA,
2099 	RAZWI_PDMA,
2100 	RAZWI_NIC,
2101 	RAZWI_DEC,
2102 	RAZWI_ROT,
2103 	RAZWI_ARC_FARM
2104 };
2105 
2106 struct hbm_mc_error_causes {
2107 	u32 mask;
2108 	char cause[50];
2109 };
2110 
2111 static struct hl_special_block_info gaudi2_special_blocks[] = GAUDI2_SPECIAL_BLOCKS;
2112 
2113 /* Special blocks iterator is currently used to configure security protection bits,
2114  * and read global errors. Most HW blocks are addressable and those who aren't (N/A)-
2115  * must be skipped. Following configurations are commonly used for both PB config
2116  * and global error reading, since currently they both share the same settings.
2117  * Once it changes, we must remember to use separate configurations for either one.
2118  */
2119 static int gaudi2_iterator_skip_block_types[] = {
2120 		GAUDI2_BLOCK_TYPE_PLL,
2121 		GAUDI2_BLOCK_TYPE_EU_BIST,
2122 		GAUDI2_BLOCK_TYPE_HBM,
2123 		GAUDI2_BLOCK_TYPE_XFT
2124 };
2125 
2126 static struct range gaudi2_iterator_skip_block_ranges[] = {
2127 		/* Skip all PSOC blocks except for PSOC_GLOBAL_CONF */
2128 		{mmPSOC_I2C_M0_BASE, mmPSOC_EFUSE_BASE},
2129 		{mmPSOC_BTL_BASE, mmPSOC_MSTR_IF_RR_SHRD_HBW_BASE},
2130 		/* Skip all CPU blocks except for CPU_IF */
2131 		{mmCPU_CA53_CFG_BASE, mmCPU_CA53_CFG_BASE},
2132 		{mmCPU_TIMESTAMP_BASE, mmCPU_MSTR_IF_RR_SHRD_HBW_BASE}
2133 };
2134 
2135 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = {
2136 	{HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"},
2137 	{HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"},
2138 	{HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"},
2139 	{HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"},
2140 	{HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"},
2141 };
2142 
2143 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = {
2144 	[HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even",
2145 	[HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd",
2146 	[HBM_SEI_READ_ERR] = "SEI read data error",
2147 	[HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error",
2148 	[HBM_SEI_CATTRIP] = "SEI CATTRIP asserted",
2149 	[HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail",
2150 	[HBM_SEI_DFI] = "SEI DFI error",
2151 	[HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read",
2152 	[HBM_SEI_BIST_FAIL] = "SEI BIST fail"
2153 };
2154 
2155 struct mmu_spi_sei_cause {
2156 	char cause[50];
2157 	int clear_bit;
2158 };
2159 
2160 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = {
2161 	{"page fault", 1},		/* INTERRUPT_CLR[1] */
2162 	{"page access", 1},		/* INTERRUPT_CLR[1] */
2163 	{"bypass ddr", 2},		/* INTERRUPT_CLR[2] */
2164 	{"multi hit", 2},		/* INTERRUPT_CLR[2] */
2165 	{"mmu rei0", -1},		/* no clear register bit */
2166 	{"mmu rei1", -1},		/* no clear register bit */
2167 	{"stlb rei0", -1},		/* no clear register bit */
2168 	{"stlb rei1", -1},		/* no clear register bit */
2169 	{"rr privileged write hit", 2},	/* INTERRUPT_CLR[2] */
2170 	{"rr privileged read hit", 2},	/* INTERRUPT_CLR[2] */
2171 	{"rr secure write hit", 2},	/* INTERRUPT_CLR[2] */
2172 	{"rr secure read hit", 2},	/* INTERRUPT_CLR[2] */
2173 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2174 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2175 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2176 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2177 	{"slave error", 16},		/* INTERRUPT_CLR[16] */
2178 	{"dec error", 17},		/* INTERRUPT_CLR[17] */
2179 	{"burst fifo full", 2}		/* INTERRUPT_CLR[2] */
2180 };
2181 
2182 struct gaudi2_cache_invld_params {
2183 	u64 start_va;
2184 	u64 end_va;
2185 	u32 inv_start_val;
2186 	u32 flags;
2187 	bool range_invalidation;
2188 };
2189 
2190 struct gaudi2_tpc_idle_data {
2191 	struct engines_data *e;
2192 	unsigned long *mask;
2193 	bool *is_idle;
2194 	const char *tpc_fmt;
2195 };
2196 
2197 struct gaudi2_tpc_mmu_data {
2198 	u32 rw_asid;
2199 };
2200 
2201 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0};
2202 
2203 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val);
2204 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id);
2205 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id);
2206 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2207 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2208 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
2209 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
2210 										bool is_memset);
2211 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2212 		struct engines_data *e);
2213 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2214 		struct engines_data *e);
2215 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2216 		struct engines_data *e);
2217 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
2218 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr);
2219 
gaudi2_init_scrambler_hbm(struct hl_device * hdev)2220 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
2221 {
2222 
2223 }
2224 
gaudi2_get_signal_cb_size(struct hl_device * hdev)2225 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev)
2226 {
2227 	return sizeof(struct packet_msg_short);
2228 }
2229 
gaudi2_get_wait_cb_size(struct hl_device * hdev)2230 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev)
2231 {
2232 	return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence);
2233 }
2234 
gaudi2_iterate_tpcs(struct hl_device * hdev,struct iterate_module_ctx * ctx)2235 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
2236 {
2237 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2238 	int dcore, inst, tpc_seq;
2239 	u32 offset;
2240 
2241 	/* init the return code */
2242 	ctx->rc = 0;
2243 
2244 	for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
2245 		for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
2246 			tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
2247 
2248 			if (!(prop->tpc_enabled_mask & BIT(tpc_seq)))
2249 				continue;
2250 
2251 			offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
2252 
2253 			ctx->fn(hdev, dcore, inst, offset, ctx);
2254 			if (ctx->rc) {
2255 				dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
2256 							dcore, inst);
2257 				return;
2258 			}
2259 		}
2260 	}
2261 
2262 	if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6)))
2263 		return;
2264 
2265 	/* special check for PCI TPC (DCORE0_TPC6) */
2266 	offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
2267 	ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
2268 	if (ctx->rc)
2269 		dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
2270 }
2271 
gaudi2_host_phys_addr_valid(u64 addr)2272 static bool gaudi2_host_phys_addr_valid(u64 addr)
2273 {
2274 	if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1))
2275 		return true;
2276 
2277 	return false;
2278 }
2279 
set_number_of_functional_hbms(struct hl_device * hdev)2280 static int set_number_of_functional_hbms(struct hl_device *hdev)
2281 {
2282 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2283 	u8 faulty_hbms = hweight64(hdev->dram_binning);
2284 
2285 	/* check if all HBMs should be used */
2286 	if (!faulty_hbms) {
2287 		dev_dbg(hdev->dev, "All HBM are in use (no binning)\n");
2288 		prop->num_functional_hbms = GAUDI2_HBM_NUM;
2289 		return 0;
2290 	}
2291 
2292 	/*
2293 	 * check for error condition in which number of binning
2294 	 * candidates is higher than the maximum supported by the
2295 	 * driver (in which case binning mask shall be ignored and driver will
2296 	 * set the default)
2297 	 */
2298 	if (faulty_hbms > MAX_FAULTY_HBMS) {
2299 		dev_err(hdev->dev,
2300 			"HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n",
2301 			MAX_FAULTY_HBMS, hdev->dram_binning);
2302 		return -EINVAL;
2303 	}
2304 
2305 	/*
2306 	 * by default, number of functional HBMs in Gaudi2 is always
2307 	 * GAUDI2_HBM_NUM - 1.
2308 	 */
2309 	prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms;
2310 	return 0;
2311 }
2312 
gaudi2_is_edma_queue_id(u32 queue_id)2313 static bool gaudi2_is_edma_queue_id(u32 queue_id)
2314 {
2315 
2316 	switch (queue_id) {
2317 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
2318 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
2319 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
2320 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
2321 		return true;
2322 	default:
2323 		return false;
2324 	}
2325 }
2326 
gaudi2_set_dram_properties(struct hl_device * hdev)2327 static int gaudi2_set_dram_properties(struct hl_device *hdev)
2328 {
2329 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2330 	u64 hbm_drv_base_offset = 0, edma_pq_base_addr;
2331 	u32 basic_hbm_page_size, edma_idx = 0;
2332 	int rc, i;
2333 
2334 	rc = set_number_of_functional_hbms(hdev);
2335 	if (rc)
2336 		return -EINVAL;
2337 
2338 	/*
2339 	 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround
2340 	 * in which we are using x16 bigger page size to be able to populate the entire
2341 	 * HBM mappings in the TLB
2342 	 */
2343 	basic_hbm_page_size = prop->num_functional_hbms * SZ_8M;
2344 	prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size;
2345 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
2346 	prop->dram_size = prop->num_functional_hbms * SZ_16G;
2347 	prop->dram_base_address = DRAM_PHYS_BASE;
2348 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
2349 	prop->dram_supports_virtual_memory = true;
2350 
2351 	prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size;
2352 	prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK;
2353 	prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START;
2354 	prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END;
2355 
2356 	/* since DRAM page size differs from DMMU page size we need to allocate
2357 	 * DRAM memory in units of dram_page size and mapping this memory in
2358 	 * units of DMMU page size. we overcome this size mismatch using a
2359 	 * scrambling routine which takes a DRAM page and converts it to a DMMU
2360 	 * page.
2361 	 * We therefore:
2362 	 * 1. partition the virtual address space to DRAM-page (whole) pages.
2363 	 *    (suppose we get n such pages)
2364 	 * 2. limit the amount of virtual address space we got from 1 above to
2365 	 *    a multiple of 64M as we don't want the scrambled address to cross
2366 	 *    the DRAM virtual address space.
2367 	 *    ( m = (n * DRAM_page_size) / DMMU_page_size).
2368 	 * 3. determine the and address accordingly
2369 	 *    end_addr = start_addr + m * 48M
2370 	 *
2371 	 *    the DRAM address MSBs (63:48) are not part of the roundup calculation
2372 	 */
2373 	prop->dmmu.start_addr = prop->dram_base_address +
2374 			(prop->dram_page_size *
2375 				DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
2376 	prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
2377 			div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
2378 	/*
2379 	 * Driver can't share an (48MB) HBM page with the F/W in order to prevent FW to block
2380 	 * the driver part by range register, so it must start at the next (48MB) page
2381 	 */
2382 	hbm_drv_base_offset = roundup(CPU_FW_IMAGE_SIZE, prop->num_functional_hbms * SZ_8M);
2383 
2384 	/*
2385 	 * The NIC driver section size and the HMMU page tables section in the HBM needs
2386 	 * to be the remaining size in the first dram page after taking into
2387 	 * account the F/W image size
2388 	 */
2389 
2390 	/* Reserve region in HBM for HMMU page tables */
2391 	prop->mmu_pgt_addr = DRAM_PHYS_BASE + hbm_drv_base_offset +
2392 				((prop->dram_page_size - hbm_drv_base_offset) -
2393 				(HMMU_PAGE_TABLES_SIZE + EDMA_PQS_SIZE + EDMA_SCRATCHPAD_SIZE));
2394 
2395 	/* Set EDMA PQs HBM addresses */
2396 	edma_pq_base_addr = prop->mmu_pgt_addr + HMMU_PAGE_TABLES_SIZE;
2397 
2398 	for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
2399 		if (gaudi2_is_edma_queue_id(i)) {
2400 			prop->hw_queues_props[i].q_dram_bd_address = edma_pq_base_addr +
2401 							(edma_idx * HL_QUEUE_SIZE_IN_BYTES);
2402 			edma_idx++;
2403 		}
2404 	}
2405 
2406 	return 0;
2407 }
2408 
gaudi2_set_fixed_properties(struct hl_device * hdev)2409 static int gaudi2_set_fixed_properties(struct hl_device *hdev)
2410 {
2411 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2412 	struct hw_queue_properties *q_props;
2413 	u32 num_sync_stream_queues = 0;
2414 	int i, rc;
2415 
2416 	prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
2417 	prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
2418 					GFP_KERNEL);
2419 
2420 	if (!prop->hw_queues_props)
2421 		return -ENOMEM;
2422 
2423 	q_props = prop->hw_queues_props;
2424 
2425 	for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
2426 		q_props[i].type = QUEUE_TYPE_HW;
2427 		q_props[i].driver_only = 0;
2428 
2429 		if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) {
2430 			q_props[i].supports_sync_stream = 0;
2431 		} else {
2432 			q_props[i].supports_sync_stream = 1;
2433 			num_sync_stream_queues++;
2434 		}
2435 
2436 		q_props[i].cb_alloc_flags = CB_ALLOC_USER;
2437 
2438 		if (gaudi2_is_edma_queue_id(i))
2439 			q_props[i].dram_bd = 1;
2440 	}
2441 
2442 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
2443 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1;
2444 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL;
2445 
2446 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
2447 	prop->cfg_base_address = CFG_BASE;
2448 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0;
2449 	prop->host_base_address = HOST_PHYS_BASE_0;
2450 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0;
2451 	prop->max_pending_cs = GAUDI2_MAX_PENDING_CS;
2452 	prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER;
2453 	prop->user_dec_intr_count = NUMBER_OF_DEC;
2454 	prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
2455 	prop->completion_mode = HL_COMPLETION_MODE_CS;
2456 	prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
2457 	prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
2458 
2459 	prop->sram_base_address = SRAM_BASE_ADDR;
2460 	prop->sram_size = SRAM_SIZE;
2461 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
2462 	prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET;
2463 
2464 	prop->hints_range_reservation = true;
2465 
2466 	prop->rotator_enabled_mask = BIT(NUM_OF_ROT) - 1;
2467 
2468 	prop->max_asid = 2;
2469 
2470 	prop->dmmu.pgt_size = HMMU_PAGE_TABLES_SIZE;
2471 	prop->mmu_pte_size = HL_PTE_SIZE;
2472 
2473 	prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
2474 	prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
2475 	prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
2476 	prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
2477 	prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
2478 	prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
2479 	prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
2480 	prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
2481 	prop->dmmu.page_size = PAGE_SIZE_1GB;
2482 	prop->dmmu.num_hops = MMU_ARCH_4_HOPS;
2483 	prop->dmmu.last_mask = LAST_MASK;
2484 	prop->dmmu.host_resident = 0;
2485 	prop->dmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
2486 	prop->dmmu.hop0_tables_total_size = HOP_TABLE_SIZE_512_PTE * prop->max_asid;
2487 
2488 	/* As we need to set the pgt address in dram for HMMU init so we cannot
2489 	 * wait to the fw cpucp info to set the dram props as mmu init comes before
2490 	 * hw init
2491 	 */
2492 	rc = hdev->asic_funcs->set_dram_properties(hdev);
2493 	if (rc)
2494 		goto free_qprops;
2495 
2496 	prop->mmu_pgt_size = PMMU_PAGE_TABLES_SIZE;
2497 
2498 	prop->pmmu.pgt_size = prop->mmu_pgt_size;
2499 	hdev->pmmu_huge_range = true;
2500 	prop->pmmu.host_resident = 1;
2501 	prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
2502 	prop->pmmu.last_mask = LAST_MASK;
2503 	prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
2504 	prop->pmmu.hop0_tables_total_size = HOP_TABLE_SIZE_512_PTE * prop->max_asid;
2505 
2506 	prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
2507 	prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
2508 	prop->hints_host_hpage_reserved_va_range.start_addr =
2509 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
2510 	prop->hints_host_hpage_reserved_va_range.end_addr =
2511 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END;
2512 
2513 	if (PAGE_SIZE == SZ_64K) {
2514 		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K;
2515 		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K;
2516 		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K;
2517 		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K;
2518 		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K;
2519 		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K;
2520 		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K;
2521 		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K;
2522 		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K;
2523 		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K;
2524 		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K;
2525 		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K;
2526 		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2527 		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2528 		prop->pmmu.page_size = PAGE_SIZE_64KB;
2529 
2530 		/* shifts and masks are the same in PMMU and HPMMU */
2531 		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2532 		prop->pmmu_huge.page_size = PAGE_SIZE_16MB;
2533 		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2534 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2535 	} else {
2536 		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K;
2537 		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K;
2538 		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K;
2539 		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K;
2540 		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K;
2541 		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K;
2542 		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K;
2543 		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K;
2544 		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K;
2545 		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K;
2546 		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K;
2547 		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K;
2548 		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2549 		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2550 		prop->pmmu.page_size = PAGE_SIZE_4KB;
2551 
2552 		/* shifts and masks are the same in PMMU and HPMMU */
2553 		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2554 		prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
2555 		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2556 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2557 	}
2558 
2559 	prop->max_num_of_engines = GAUDI2_ENGINE_ID_SIZE;
2560 	prop->num_engine_cores = CPU_ID_MAX;
2561 	prop->cfg_size = CFG_SIZE;
2562 	prop->num_of_events = GAUDI2_EVENT_SIZE;
2563 
2564 	prop->supports_engine_modes = true;
2565 
2566 	prop->dc_power_default = DC_POWER_DEFAULT;
2567 
2568 	prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
2569 	prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE;
2570 	prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
2571 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
2572 
2573 	strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2574 
2575 	prop->mme_master_slave_mode = 1;
2576 
2577 	prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
2578 					(num_sync_stream_queues * HL_RSVD_SOBS);
2579 
2580 	prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
2581 					(num_sync_stream_queues * HL_RSVD_MONS);
2582 
2583 	prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
2584 	prop->tpc_interrupt_id = GAUDI2_IRQ_NUM_TPC_ASSERT;
2585 	prop->eq_interrupt_id = GAUDI2_IRQ_NUM_EVENT_QUEUE;
2586 
2587 	prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
2588 
2589 	prop->fw_cpu_boot_dev_sts0_valid = false;
2590 	prop->fw_cpu_boot_dev_sts1_valid = false;
2591 	prop->hard_reset_done_by_fw = false;
2592 	prop->gic_interrupts_enable = true;
2593 
2594 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
2595 
2596 	prop->max_dec = NUMBER_OF_DEC;
2597 
2598 	prop->clk_pll_index = HL_GAUDI2_MME_PLL;
2599 
2600 	prop->dma_mask = 64;
2601 
2602 	prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0;
2603 
2604 	prop->supports_advanced_cpucp_rc = true;
2605 
2606 	return 0;
2607 
2608 free_qprops:
2609 	kfree(prop->hw_queues_props);
2610 	return rc;
2611 }
2612 
gaudi2_pci_bars_map(struct hl_device * hdev)2613 static int gaudi2_pci_bars_map(struct hl_device *hdev)
2614 {
2615 	static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"};
2616 	bool is_wc[3] = {false, false, true};
2617 	int rc;
2618 
2619 	rc = hl_pci_bars_map(hdev, name, is_wc);
2620 	if (rc)
2621 		return rc;
2622 
2623 	hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR);
2624 
2625 	return 0;
2626 }
2627 
gaudi2_set_hbm_bar_base(struct hl_device * hdev,u64 addr)2628 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
2629 {
2630 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2631 	struct hl_inbound_pci_region pci_region;
2632 	u64 old_addr = addr;
2633 	int rc;
2634 
2635 	if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr))
2636 		return old_addr;
2637 
2638 	if (hdev->asic_prop.iatu_done_by_fw)
2639 		return U64_MAX;
2640 
2641 	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2642 	pci_region.mode = PCI_BAR_MATCH_MODE;
2643 	pci_region.bar = DRAM_BAR_ID;
2644 	pci_region.addr = addr;
2645 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
2646 	if (rc)
2647 		return U64_MAX;
2648 
2649 	if (gaudi2) {
2650 		old_addr = gaudi2->dram_bar_cur_addr;
2651 		gaudi2->dram_bar_cur_addr = addr;
2652 	}
2653 
2654 	return old_addr;
2655 }
2656 
gaudi2_init_iatu(struct hl_device * hdev)2657 static int gaudi2_init_iatu(struct hl_device *hdev)
2658 {
2659 	struct hl_inbound_pci_region inbound_region;
2660 	struct hl_outbound_pci_region outbound_region;
2661 	u32 bar_addr_low, bar_addr_high;
2662 	int rc;
2663 
2664 	if (hdev->asic_prop.iatu_done_by_fw)
2665 		return 0;
2666 
2667 	/* Temporary inbound Region 0 - Bar 0 - Point to CFG
2668 	 * We must map this region in BAR match mode in order to
2669 	 * fetch BAR physical base address
2670 	 */
2671 	inbound_region.mode = PCI_BAR_MATCH_MODE;
2672 	inbound_region.bar = SRAM_CFG_BAR_ID;
2673 	/* Base address must be aligned to Bar size which is 256 MB */
2674 	inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF;
2675 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2676 	if (rc)
2677 		return rc;
2678 
2679 	/* Fetch physical BAR address */
2680 	bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF);
2681 	bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF;
2682 
2683 	hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low;
2684 
2685 	/* Inbound Region 0 - Bar 0 - Point to CFG */
2686 	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2687 	inbound_region.bar = SRAM_CFG_BAR_ID;
2688 	inbound_region.offset_in_bar = 0;
2689 	inbound_region.addr = STM_FLASH_BASE_ADDR;
2690 	inbound_region.size = CFG_REGION_SIZE;
2691 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2692 	if (rc)
2693 		return rc;
2694 
2695 	/* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */
2696 	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2697 	inbound_region.bar = SRAM_CFG_BAR_ID;
2698 	inbound_region.offset_in_bar = CFG_REGION_SIZE;
2699 	inbound_region.addr = BAR0_RSRVD_BASE_ADDR;
2700 	inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE;
2701 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
2702 	if (rc)
2703 		return rc;
2704 
2705 	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2706 	inbound_region.mode = PCI_BAR_MATCH_MODE;
2707 	inbound_region.bar = DRAM_BAR_ID;
2708 	inbound_region.addr = DRAM_PHYS_BASE;
2709 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
2710 	if (rc)
2711 		return rc;
2712 
2713 	/* Outbound Region 0 - Point to Host */
2714 	outbound_region.addr = HOST_PHYS_BASE_0;
2715 	outbound_region.size = HOST_PHYS_SIZE_0;
2716 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
2717 
2718 	return rc;
2719 }
2720 
gaudi2_get_hw_state(struct hl_device * hdev)2721 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev)
2722 {
2723 	return RREG32(mmHW_STATE);
2724 }
2725 
gaudi2_tpc_binning_init_prop(struct hl_device * hdev)2726 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev)
2727 {
2728 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2729 
2730 	/*
2731 	 * check for error condition in which number of binning candidates
2732 	 * is higher than the maximum supported by the driver
2733 	 */
2734 	if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) {
2735 		dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n",
2736 					MAX_CLUSTER_BINNING_FAULTY_TPCS,
2737 					hdev->tpc_binning);
2738 		return -EINVAL;
2739 	}
2740 
2741 	prop->tpc_binning_mask = hdev->tpc_binning;
2742 	prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK;
2743 
2744 	return 0;
2745 }
2746 
gaudi2_set_tpc_binning_masks(struct hl_device * hdev)2747 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev)
2748 {
2749 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2750 	struct hw_queue_properties *q_props = prop->hw_queues_props;
2751 	u64 tpc_binning_mask;
2752 	u8 subst_idx = 0;
2753 	int i, rc;
2754 
2755 	rc = gaudi2_tpc_binning_init_prop(hdev);
2756 	if (rc)
2757 		return rc;
2758 
2759 	tpc_binning_mask = prop->tpc_binning_mask;
2760 
2761 	for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) {
2762 		u8 subst_seq, binned, qid_base;
2763 
2764 		if (tpc_binning_mask == 0)
2765 			break;
2766 
2767 		if (subst_idx == 0) {
2768 			subst_seq = TPC_ID_DCORE0_TPC6;
2769 			qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
2770 		} else {
2771 			subst_seq = TPC_ID_DCORE3_TPC5;
2772 			qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0;
2773 		}
2774 
2775 
2776 		/* clear bit from mask */
2777 		binned = __ffs(tpc_binning_mask);
2778 		/*
2779 		 * Coverity complains about possible out-of-bound access in
2780 		 * clear_bit
2781 		 */
2782 		if (binned >= TPC_ID_SIZE) {
2783 			dev_err(hdev->dev,
2784 				"Invalid binned TPC (binning mask: %llx)\n",
2785 				tpc_binning_mask);
2786 			return -EINVAL;
2787 		}
2788 		clear_bit(binned, (unsigned long *)&tpc_binning_mask);
2789 
2790 		/* also clear replacing TPC bit from enabled mask */
2791 		clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask);
2792 
2793 		/* bin substite TPC's Qs */
2794 		q_props[qid_base].binned = 1;
2795 		q_props[qid_base + 1].binned = 1;
2796 		q_props[qid_base + 2].binned = 1;
2797 		q_props[qid_base + 3].binned = 1;
2798 
2799 		subst_idx++;
2800 	}
2801 
2802 	return 0;
2803 }
2804 
gaudi2_set_dec_binning_masks(struct hl_device * hdev)2805 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev)
2806 {
2807 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2808 	u8 num_faulty;
2809 
2810 	num_faulty = hweight32(hdev->decoder_binning);
2811 
2812 	/*
2813 	 * check for error condition in which number of binning candidates
2814 	 * is higher than the maximum supported by the driver
2815 	 */
2816 	if (num_faulty > MAX_FAULTY_DECODERS) {
2817 		dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n",
2818 						hdev->decoder_binning);
2819 		return -EINVAL;
2820 	}
2821 
2822 	prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK);
2823 
2824 	if (prop->decoder_binning_mask)
2825 		prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1));
2826 	else
2827 		prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK;
2828 
2829 	return 0;
2830 }
2831 
gaudi2_set_dram_binning_masks(struct hl_device * hdev)2832 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev)
2833 {
2834 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2835 
2836 	/* check if we should override default binning */
2837 	if (!hdev->dram_binning) {
2838 		prop->dram_binning_mask = 0;
2839 		prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK;
2840 		return;
2841 	}
2842 
2843 	/* set DRAM binning constraints */
2844 	prop->faulty_dram_cluster_map |= hdev->dram_binning;
2845 	prop->dram_binning_mask = hdev->dram_binning;
2846 	prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5);
2847 }
2848 
gaudi2_set_edma_binning_masks(struct hl_device * hdev)2849 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev)
2850 {
2851 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2852 	struct hw_queue_properties *q_props;
2853 	u8 seq, num_faulty;
2854 
2855 	num_faulty = hweight32(hdev->edma_binning);
2856 
2857 	/*
2858 	 * check for error condition in which number of binning candidates
2859 	 * is higher than the maximum supported by the driver
2860 	 */
2861 	if (num_faulty > MAX_FAULTY_EDMAS) {
2862 		dev_err(hdev->dev,
2863 			"EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n",
2864 			hdev->edma_binning);
2865 		return -EINVAL;
2866 	}
2867 
2868 	if (!hdev->edma_binning) {
2869 		prop->edma_binning_mask = 0;
2870 		prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK;
2871 		return 0;
2872 	}
2873 
2874 	seq = __ffs((unsigned long)hdev->edma_binning);
2875 
2876 	/* set binning constraints */
2877 	prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]);
2878 	prop->edma_binning_mask = hdev->edma_binning;
2879 	prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1);
2880 
2881 	/* bin substitute EDMA's queue */
2882 	q_props = prop->hw_queues_props;
2883 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1;
2884 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1;
2885 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1;
2886 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1;
2887 
2888 	return 0;
2889 }
2890 
gaudi2_set_xbar_edge_enable_mask(struct hl_device * hdev,u32 xbar_edge_iso_mask)2891 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask)
2892 {
2893 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2894 	u8 num_faulty, seq;
2895 
2896 	/* check if we should override default binning */
2897 	if (!xbar_edge_iso_mask) {
2898 		prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK;
2899 		return 0;
2900 	}
2901 
2902 	/*
2903 	 * note that it can be set to value other than 0 only after cpucp packet (i.e.
2904 	 * only the FW can set a redundancy value). for user it'll always be 0.
2905 	 */
2906 	num_faulty = hweight32(xbar_edge_iso_mask);
2907 
2908 	/*
2909 	 * check for error condition in which number of binning candidates
2910 	 * is higher than the maximum supported by the driver
2911 	 */
2912 	if (num_faulty > MAX_FAULTY_XBARS) {
2913 		dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n",
2914 									MAX_FAULTY_XBARS);
2915 		return -EINVAL;
2916 	}
2917 
2918 	seq = __ffs((unsigned long)xbar_edge_iso_mask);
2919 
2920 	/* set binning constraints */
2921 	prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]);
2922 	prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK;
2923 
2924 	return 0;
2925 }
2926 
gaudi2_set_cluster_binning_masks_common(struct hl_device * hdev,u8 xbar_edge_iso_mask)2927 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask)
2928 {
2929 	int rc;
2930 
2931 	/*
2932 	 * mark all clusters as good, each component will "fail" cluster
2933 	 * based on eFuse/user values.
2934 	 * If more than single cluster is faulty- the chip is unusable
2935 	 */
2936 	hdev->asic_prop.faulty_dram_cluster_map = 0;
2937 
2938 	gaudi2_set_dram_binning_masks(hdev);
2939 
2940 	rc = gaudi2_set_edma_binning_masks(hdev);
2941 	if (rc)
2942 		return rc;
2943 
2944 	rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask);
2945 	if (rc)
2946 		return rc;
2947 
2948 
2949 	/* always initially set to full mask */
2950 	hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK;
2951 
2952 	return 0;
2953 }
2954 
gaudi2_set_cluster_binning_masks(struct hl_device * hdev)2955 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev)
2956 {
2957 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2958 	int rc;
2959 
2960 	rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask);
2961 	if (rc)
2962 		return rc;
2963 
2964 	/* if we have DRAM binning reported by FW we should perform cluster config  */
2965 	if (prop->faulty_dram_cluster_map) {
2966 		u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map);
2967 
2968 		prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq];
2969 	}
2970 
2971 	return 0;
2972 }
2973 
gaudi2_set_binning_masks(struct hl_device * hdev)2974 static int gaudi2_set_binning_masks(struct hl_device *hdev)
2975 {
2976 	int rc;
2977 
2978 	rc = gaudi2_set_cluster_binning_masks(hdev);
2979 	if (rc)
2980 		return rc;
2981 
2982 	rc = gaudi2_set_tpc_binning_masks(hdev);
2983 	if (rc)
2984 		return rc;
2985 
2986 	rc = gaudi2_set_dec_binning_masks(hdev);
2987 	if (rc)
2988 		return rc;
2989 
2990 	return 0;
2991 }
2992 
gaudi2_cpucp_info_get(struct hl_device * hdev)2993 static int gaudi2_cpucp_info_get(struct hl_device *hdev)
2994 {
2995 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2996 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2997 	long max_power;
2998 	u64 dram_size;
2999 	int rc;
3000 
3001 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
3002 		return 0;
3003 
3004 	/* No point of asking this information again when not doing hard reset, as the device
3005 	 * CPU hasn't been reset
3006 	 */
3007 	if (hdev->reset_info.in_compute_reset)
3008 		return 0;
3009 
3010 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
3011 										mmCPU_BOOT_ERR1);
3012 	if (rc)
3013 		return rc;
3014 
3015 	dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
3016 	if (dram_size) {
3017 		/* we can have wither 5 or 6 HBMs. other values are invalid */
3018 
3019 		if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) &&
3020 					(dram_size != (GAUDI2_HBM_NUM * SZ_16G))) {
3021 			dev_err(hdev->dev,
3022 				"F/W reported invalid DRAM size %llu. Trying to use default size %llu\n",
3023 				dram_size, prop->dram_size);
3024 			dram_size = prop->dram_size;
3025 		}
3026 
3027 		prop->dram_size = dram_size;
3028 		prop->dram_end_address = prop->dram_base_address + dram_size;
3029 	}
3030 
3031 	if (!strlen(prop->cpucp_info.card_name))
3032 		strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME,
3033 				CARD_NAME_MAX_LEN);
3034 
3035 	/* Overwrite binning masks with the actual binning values from F/W */
3036 	hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
3037 	hdev->edma_binning = prop->cpucp_info.edma_binning_mask;
3038 	hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
3039 	hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
3040 
3041 	dev_dbg(hdev->dev, "Read binning masks: tpc: 0x%llx, dram: 0x%llx, edma: 0x%x, dec: 0x%x\n",
3042 			hdev->tpc_binning, hdev->dram_binning, hdev->edma_binning,
3043 			hdev->decoder_binning);
3044 
3045 	/*
3046 	 * at this point the DRAM parameters need to be updated according to data obtained
3047 	 * from the FW
3048 	 */
3049 	rc = hdev->asic_funcs->set_dram_properties(hdev);
3050 	if (rc)
3051 		return rc;
3052 
3053 	rc = hdev->asic_funcs->set_binning_masks(hdev);
3054 	if (rc)
3055 		return rc;
3056 
3057 	max_power = hl_fw_get_max_power(hdev);
3058 	if (max_power < 0)
3059 		return max_power;
3060 
3061 	prop->max_power_default = (u64) max_power;
3062 
3063 	return 0;
3064 }
3065 
gaudi2_fetch_psoc_frequency(struct hl_device * hdev)3066 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
3067 {
3068 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3069 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS];
3070 	int rc;
3071 
3072 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
3073 		return 0;
3074 
3075 	rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr);
3076 	if (rc)
3077 		return rc;
3078 
3079 	hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3];
3080 
3081 	return 0;
3082 }
3083 
gaudi2_mmu_clear_pgt_range(struct hl_device * hdev)3084 static int gaudi2_mmu_clear_pgt_range(struct hl_device *hdev)
3085 {
3086 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3087 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3088 	int rc;
3089 
3090 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
3091 		return 0;
3092 
3093 	if (prop->dmmu.host_resident)
3094 		return 0;
3095 
3096 	rc = gaudi2_memset_device_memory(hdev, prop->mmu_pgt_addr, prop->dmmu.pgt_size, 0);
3097 	if (rc)
3098 		dev_err(hdev->dev, "Failed to clear mmu pgt");
3099 
3100 	return rc;
3101 }
3102 
gaudi2_early_init(struct hl_device * hdev)3103 static int gaudi2_early_init(struct hl_device *hdev)
3104 {
3105 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3106 	struct pci_dev *pdev = hdev->pdev;
3107 	resource_size_t pci_bar_size;
3108 	int rc;
3109 
3110 	rc = gaudi2_set_fixed_properties(hdev);
3111 	if (rc)
3112 		return rc;
3113 
3114 	/* Check BAR sizes */
3115 	pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
3116 
3117 	if (pci_bar_size != CFG_BAR_SIZE) {
3118 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
3119 			SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
3120 		rc = -ENODEV;
3121 		goto free_queue_props;
3122 	}
3123 
3124 	pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
3125 	if (pci_bar_size != MSIX_BAR_SIZE) {
3126 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
3127 			MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
3128 		rc = -ENODEV;
3129 		goto free_queue_props;
3130 	}
3131 
3132 	prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
3133 	hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
3134 
3135 	/*
3136 	 * Only in pldm driver config iATU
3137 	 */
3138 	if (hdev->pldm)
3139 		hdev->asic_prop.iatu_done_by_fw = false;
3140 	else
3141 		hdev->asic_prop.iatu_done_by_fw = true;
3142 
3143 	rc = hl_pci_init(hdev);
3144 	if (rc)
3145 		goto free_queue_props;
3146 
3147 	/* Before continuing in the initialization, we need to read the preboot
3148 	 * version to determine whether we run with a security-enabled firmware
3149 	 */
3150 	rc = hl_fw_read_preboot_status(hdev);
3151 	if (rc) {
3152 		if (hdev->reset_on_preboot_fail)
3153 			/* we are already on failure flow, so don't check if hw_fini fails. */
3154 			hdev->asic_funcs->hw_fini(hdev, true, false);
3155 		goto pci_fini;
3156 	}
3157 
3158 	if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
3159 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
3160 		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
3161 		if (rc) {
3162 			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
3163 			goto pci_fini;
3164 		}
3165 	}
3166 
3167 	return 0;
3168 
3169 pci_fini:
3170 	hl_pci_fini(hdev);
3171 free_queue_props:
3172 	kfree(hdev->asic_prop.hw_queues_props);
3173 	return rc;
3174 }
3175 
gaudi2_early_fini(struct hl_device * hdev)3176 static int gaudi2_early_fini(struct hl_device *hdev)
3177 {
3178 	kfree(hdev->asic_prop.hw_queues_props);
3179 	hl_pci_fini(hdev);
3180 
3181 	return 0;
3182 }
3183 
gaudi2_is_arc_nic_owned(u64 arc_id)3184 static bool gaudi2_is_arc_nic_owned(u64 arc_id)
3185 {
3186 	switch (arc_id) {
3187 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
3188 		return true;
3189 	default:
3190 		return false;
3191 	}
3192 }
3193 
gaudi2_is_arc_tpc_owned(u64 arc_id)3194 static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
3195 {
3196 	switch (arc_id) {
3197 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
3198 		return true;
3199 	default:
3200 		return false;
3201 	}
3202 }
3203 
gaudi2_init_arcs(struct hl_device * hdev)3204 static void gaudi2_init_arcs(struct hl_device *hdev)
3205 {
3206 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3207 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3208 	u64 arc_id;
3209 	u32 i;
3210 
3211 	for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) {
3212 		if (gaudi2_is_arc_enabled(hdev, i))
3213 			continue;
3214 
3215 		gaudi2_set_arc_id_cap(hdev, i);
3216 	}
3217 
3218 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
3219 		if (!gaudi2_is_queue_enabled(hdev, i))
3220 			continue;
3221 
3222 		arc_id = gaudi2_queue_id_to_arc_id[i];
3223 		if (gaudi2_is_arc_enabled(hdev, arc_id))
3224 			continue;
3225 
3226 		if (gaudi2_is_arc_nic_owned(arc_id) &&
3227 				!(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)))
3228 			continue;
3229 
3230 		if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized &
3231 							BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)))
3232 			continue;
3233 
3234 		gaudi2_set_arc_id_cap(hdev, arc_id);
3235 	}
3236 
3237 	/* Fetch ARC scratchpad address */
3238 	hdev->asic_prop.engine_core_interrupt_reg_addr =
3239 		CFG_BASE + le32_to_cpu(dyn_regs->eng_arc_irq_ctrl);
3240 }
3241 
gaudi2_scrub_arc_dccm(struct hl_device * hdev,u32 cpu_id)3242 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
3243 {
3244 	u32 reg_base, reg_val;
3245 	int rc;
3246 
3247 	switch (cpu_id) {
3248 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3:
3249 		/* Each ARC scheduler has 2 consecutive DCCM blocks */
3250 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3251 						ARC_DCCM_BLOCK_SIZE * 2, true);
3252 		if (rc)
3253 			return rc;
3254 		break;
3255 	case CPU_ID_SCHED_ARC4:
3256 	case CPU_ID_SCHED_ARC5:
3257 	case CPU_ID_MME_QMAN_ARC0:
3258 	case CPU_ID_MME_QMAN_ARC1:
3259 		reg_base = gaudi2_arc_blocks_bases[cpu_id];
3260 
3261 		/* Scrub lower DCCM block */
3262 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3263 						ARC_DCCM_BLOCK_SIZE, true);
3264 		if (rc)
3265 			return rc;
3266 
3267 		/* Switch to upper DCCM block */
3268 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1);
3269 		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3270 
3271 		/* Scrub upper DCCM block */
3272 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3273 						ARC_DCCM_BLOCK_SIZE, true);
3274 		if (rc)
3275 			return rc;
3276 
3277 		/* Switch to lower DCCM block */
3278 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0);
3279 		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3280 		break;
3281 	default:
3282 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3283 						ARC_DCCM_BLOCK_SIZE, true);
3284 		if (rc)
3285 			return rc;
3286 	}
3287 
3288 	return 0;
3289 }
3290 
gaudi2_scrub_arcs_dccm(struct hl_device * hdev)3291 static int gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
3292 {
3293 	u16 arc_id;
3294 	int rc;
3295 
3296 	for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
3297 		if (!gaudi2_is_arc_enabled(hdev, arc_id))
3298 			continue;
3299 
3300 		rc = gaudi2_scrub_arc_dccm(hdev, arc_id);
3301 		if (rc)
3302 			return rc;
3303 	}
3304 
3305 	return 0;
3306 }
3307 
gaudi2_late_init(struct hl_device * hdev)3308 static int gaudi2_late_init(struct hl_device *hdev)
3309 {
3310 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3311 	int rc;
3312 
3313 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
3314 					gaudi2->virt_msix_db_dma_addr);
3315 	if (rc)
3316 		return rc;
3317 
3318 	rc = gaudi2_fetch_psoc_frequency(hdev);
3319 	if (rc) {
3320 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
3321 		goto disable_pci_access;
3322 	}
3323 
3324 	rc = gaudi2_mmu_clear_pgt_range(hdev);
3325 	if (rc) {
3326 		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
3327 		goto disable_pci_access;
3328 	}
3329 
3330 	gaudi2_init_arcs(hdev);
3331 
3332 	rc = gaudi2_scrub_arcs_dccm(hdev);
3333 	if (rc) {
3334 		dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
3335 		goto disable_pci_access;
3336 	}
3337 
3338 	gaudi2_init_security(hdev);
3339 
3340 	return 0;
3341 
3342 disable_pci_access:
3343 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
3344 
3345 	return rc;
3346 }
3347 
gaudi2_late_fini(struct hl_device * hdev)3348 static void gaudi2_late_fini(struct hl_device *hdev)
3349 {
3350 	hl_hwmon_release_resources(hdev);
3351 }
3352 
gaudi2_user_mapped_dec_init(struct gaudi2_device * gaudi2,u32 start_idx)3353 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
3354 {
3355 	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3356 
3357 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3358 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3359 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3360 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3361 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3362 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3363 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3364 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3365 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3366 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3367 }
3368 
gaudi2_user_mapped_blocks_init(struct hl_device * hdev)3369 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev)
3370 {
3371 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3372 	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3373 	u32 block_size, umr_start_idx, num_umr_blocks;
3374 	int i;
3375 
3376 	for (i = 0 ; i < NUM_ARC_CPUS ; i++) {
3377 		if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3)
3378 			block_size = ARC_DCCM_BLOCK_SIZE * 2;
3379 		else
3380 			block_size = ARC_DCCM_BLOCK_SIZE;
3381 
3382 		blocks[i].address = gaudi2_arc_dccm_bases[i];
3383 		blocks[i].size = block_size;
3384 	}
3385 
3386 	blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE;
3387 	blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE;
3388 
3389 	blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE;
3390 	blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE;
3391 
3392 	blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE;
3393 	blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE;
3394 
3395 	blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE;
3396 	blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE;
3397 
3398 	blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE;
3399 	blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE;
3400 
3401 	blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE;
3402 	blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE;
3403 
3404 	blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE;
3405 	blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE;
3406 
3407 	blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE;
3408 	blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE;
3409 
3410 	umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS;
3411 	num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS;
3412 	for (i = 0 ; i < num_umr_blocks ; i++) {
3413 		u8 nic_id, umr_block_id;
3414 
3415 		nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS;
3416 		umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS;
3417 
3418 		blocks[umr_start_idx + i].address =
3419 			mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE +
3420 			(nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET +
3421 			(nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET +
3422 			umr_block_id * NIC_UMR_OFFSET;
3423 		blocks[umr_start_idx + i].size = HL_BLOCK_SIZE;
3424 	}
3425 
3426 	/* Expose decoder HW configuration block to user */
3427 	gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX);
3428 
3429 	for (i = 1; i < NUM_OF_DCORES; ++i) {
3430 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE;
3431 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE;
3432 
3433 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address =
3434 						mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET;
3435 
3436 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address =
3437 						mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET;
3438 	}
3439 }
3440 
gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)3441 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
3442 {
3443 	dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
3444 	void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {};
3445 	int i, j, rc = 0;
3446 
3447 	/* The device ARC works with 32-bits addresses, and because there is a single HW register
3448 	 * that holds the extension bits (49..28), these bits must be identical in all the allocated
3449 	 * range.
3450 	 */
3451 
3452 	for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
3453 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
3454 							&dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO);
3455 		if (!virt_addr_arr[i]) {
3456 			rc = -ENOMEM;
3457 			goto free_dma_mem_arr;
3458 		}
3459 
3460 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
3461 		if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr))
3462 			break;
3463 	}
3464 
3465 	if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) {
3466 		dev_err(hdev->dev,
3467 			"MSB of ARC accessible DMA memory are not identical in all range\n");
3468 		rc = -EFAULT;
3469 		goto free_dma_mem_arr;
3470 	}
3471 
3472 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
3473 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
3474 
3475 free_dma_mem_arr:
3476 	for (j = 0 ; j < i ; j++)
3477 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
3478 						dma_addr_arr[j]);
3479 
3480 	return rc;
3481 }
3482 
gaudi2_set_pci_memory_regions(struct hl_device * hdev)3483 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev)
3484 {
3485 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3486 	struct pci_mem_region *region;
3487 
3488 	/* CFG */
3489 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
3490 	region->region_base = CFG_BASE;
3491 	region->region_size = CFG_SIZE;
3492 	region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR;
3493 	region->bar_size = CFG_BAR_SIZE;
3494 	region->bar_id = SRAM_CFG_BAR_ID;
3495 	region->used = 1;
3496 
3497 	/* SRAM */
3498 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
3499 	region->region_base = SRAM_BASE_ADDR;
3500 	region->region_size = SRAM_SIZE;
3501 	region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE;
3502 	region->bar_size = CFG_BAR_SIZE;
3503 	region->bar_id = SRAM_CFG_BAR_ID;
3504 	region->used = 1;
3505 
3506 	/* DRAM */
3507 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
3508 	region->region_base = DRAM_PHYS_BASE;
3509 	region->region_size = hdev->asic_prop.dram_size;
3510 	region->offset_in_bar = 0;
3511 	region->bar_size = prop->dram_pci_bar_size;
3512 	region->bar_id = DRAM_BAR_ID;
3513 	region->used = 1;
3514 }
3515 
gaudi2_user_interrupt_setup(struct hl_device * hdev)3516 static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
3517 {
3518 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3519 	int i, j, k;
3520 
3521 	/* Initialize TPC interrupt */
3522 	HL_USR_INTR_STRUCT_INIT(hdev->tpc_interrupt, hdev, 0, HL_USR_INTERRUPT_TPC);
3523 
3524 	/* Initialize unexpected error interrupt */
3525 	HL_USR_INTR_STRUCT_INIT(hdev->unexpected_error_interrupt, hdev, 0,
3526 						HL_USR_INTERRUPT_UNEXPECTED);
3527 
3528 	/* Initialize common user CQ interrupt */
3529 	HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
3530 				HL_COMMON_USER_CQ_INTERRUPT_ID, HL_USR_INTERRUPT_CQ);
3531 
3532 	/* Initialize common decoder interrupt */
3533 	HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev,
3534 				HL_COMMON_DEC_INTERRUPT_ID, HL_USR_INTERRUPT_DECODER);
3535 
3536 	/* User interrupts structure holds both decoder and user interrupts from various engines.
3537 	 * We first initialize the decoder interrupts and then we add the user interrupts.
3538 	 * The only limitation is that the last decoder interrupt id must be smaller
3539 	 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time.
3540 	 */
3541 
3542 	/* Initialize decoder interrupts, expose only normal interrupts,
3543 	 * error interrupts to be handled by driver
3544 	 */
3545 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM;
3546 										i += 2, j++)
3547 		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i,
3548 						HL_USR_INTERRUPT_DECODER);
3549 
3550 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++)
3551 		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, HL_USR_INTERRUPT_CQ);
3552 }
3553 
gaudi2_get_non_zero_random_int(void)3554 static inline int gaudi2_get_non_zero_random_int(void)
3555 {
3556 	int rand = get_random_u32();
3557 
3558 	return rand ? rand : 1;
3559 }
3560 
gaudi2_special_blocks_free(struct hl_device * hdev)3561 static void gaudi2_special_blocks_free(struct hl_device *hdev)
3562 {
3563 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3564 	struct hl_skip_blocks_cfg *skip_special_blocks_cfg =
3565 			&prop->skip_special_blocks_cfg;
3566 
3567 	kfree(prop->special_blocks);
3568 	kfree(skip_special_blocks_cfg->block_types);
3569 	kfree(skip_special_blocks_cfg->block_ranges);
3570 }
3571 
gaudi2_special_blocks_iterator_free(struct hl_device * hdev)3572 static void gaudi2_special_blocks_iterator_free(struct hl_device *hdev)
3573 {
3574 	gaudi2_special_blocks_free(hdev);
3575 }
3576 
gaudi2_special_block_skip(struct hl_device * hdev,struct hl_special_blocks_cfg * special_blocks_cfg,u32 blk_idx,u32 major,u32 minor,u32 sub_minor)3577 static bool gaudi2_special_block_skip(struct hl_device *hdev,
3578 		struct hl_special_blocks_cfg *special_blocks_cfg,
3579 		u32 blk_idx, u32 major, u32 minor, u32 sub_minor)
3580 {
3581 	return false;
3582 }
3583 
gaudi2_special_blocks_config(struct hl_device * hdev)3584 static int gaudi2_special_blocks_config(struct hl_device *hdev)
3585 {
3586 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3587 	int i, rc;
3588 
3589 	/* Configure Special blocks */
3590 	prop->glbl_err_max_cause_num = GAUDI2_GLBL_ERR_MAX_CAUSE_NUM;
3591 	prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks);
3592 	prop->special_blocks = kmalloc_array(prop->num_of_special_blocks,
3593 			sizeof(*prop->special_blocks), GFP_KERNEL);
3594 	if (!prop->special_blocks)
3595 		return -ENOMEM;
3596 
3597 	for (i = 0 ; i < prop->num_of_special_blocks ; i++)
3598 		memcpy(&prop->special_blocks[i], &gaudi2_special_blocks[i],
3599 				sizeof(*prop->special_blocks));
3600 
3601 	/* Configure when to skip Special blocks */
3602 	memset(&prop->skip_special_blocks_cfg, 0, sizeof(prop->skip_special_blocks_cfg));
3603 	prop->skip_special_blocks_cfg.skip_block_hook = gaudi2_special_block_skip;
3604 
3605 	if (ARRAY_SIZE(gaudi2_iterator_skip_block_types)) {
3606 		prop->skip_special_blocks_cfg.block_types =
3607 				kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_types),
3608 					sizeof(gaudi2_iterator_skip_block_types[0]), GFP_KERNEL);
3609 		if (!prop->skip_special_blocks_cfg.block_types) {
3610 			rc = -ENOMEM;
3611 			goto free_special_blocks;
3612 		}
3613 
3614 		memcpy(prop->skip_special_blocks_cfg.block_types, gaudi2_iterator_skip_block_types,
3615 				sizeof(gaudi2_iterator_skip_block_types));
3616 
3617 		prop->skip_special_blocks_cfg.block_types_len =
3618 					ARRAY_SIZE(gaudi2_iterator_skip_block_types);
3619 	}
3620 
3621 	if (ARRAY_SIZE(gaudi2_iterator_skip_block_ranges)) {
3622 		prop->skip_special_blocks_cfg.block_ranges =
3623 				kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_ranges),
3624 					sizeof(gaudi2_iterator_skip_block_ranges[0]), GFP_KERNEL);
3625 		if (!prop->skip_special_blocks_cfg.block_ranges) {
3626 			rc = -ENOMEM;
3627 			goto free_skip_special_blocks_types;
3628 		}
3629 
3630 		for (i = 0 ; i < ARRAY_SIZE(gaudi2_iterator_skip_block_ranges) ; i++)
3631 			memcpy(&prop->skip_special_blocks_cfg.block_ranges[i],
3632 					&gaudi2_iterator_skip_block_ranges[i],
3633 					sizeof(struct range));
3634 
3635 		prop->skip_special_blocks_cfg.block_ranges_len =
3636 					ARRAY_SIZE(gaudi2_iterator_skip_block_ranges);
3637 	}
3638 
3639 	return 0;
3640 
3641 free_skip_special_blocks_types:
3642 	kfree(prop->skip_special_blocks_cfg.block_types);
3643 free_special_blocks:
3644 	kfree(prop->special_blocks);
3645 
3646 	return rc;
3647 }
3648 
gaudi2_special_blocks_iterator_config(struct hl_device * hdev)3649 static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev)
3650 {
3651 	return gaudi2_special_blocks_config(hdev);
3652 }
3653 
gaudi2_test_queues_msgs_free(struct hl_device * hdev)3654 static void gaudi2_test_queues_msgs_free(struct hl_device *hdev)
3655 {
3656 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3657 	struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3658 	int i;
3659 
3660 	for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3661 		/* bail-out if this is an allocation failure point */
3662 		if (!msg_info[i].kern_addr)
3663 			break;
3664 
3665 		hl_asic_dma_pool_free(hdev, msg_info[i].kern_addr, msg_info[i].dma_addr);
3666 		msg_info[i].kern_addr = NULL;
3667 	}
3668 }
3669 
gaudi2_test_queues_msgs_alloc(struct hl_device * hdev)3670 static int gaudi2_test_queues_msgs_alloc(struct hl_device *hdev)
3671 {
3672 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3673 	struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3674 	int i, rc;
3675 
3676 	/* allocate a message-short buf for each Q we intend to test */
3677 	for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3678 		msg_info[i].kern_addr =
3679 			(void *)hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_short),
3680 							GFP_KERNEL, &msg_info[i].dma_addr);
3681 		if (!msg_info[i].kern_addr) {
3682 			dev_err(hdev->dev,
3683 				"Failed to allocate dma memory for H/W queue %d testing\n", i);
3684 			rc = -ENOMEM;
3685 			goto err_exit;
3686 		}
3687 	}
3688 
3689 	return 0;
3690 
3691 err_exit:
3692 	gaudi2_test_queues_msgs_free(hdev);
3693 	return rc;
3694 }
3695 
gaudi2_sw_init(struct hl_device * hdev)3696 static int gaudi2_sw_init(struct hl_device *hdev)
3697 {
3698 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3699 	struct gaudi2_device *gaudi2;
3700 	int i, rc;
3701 
3702 	/* Allocate device structure */
3703 	gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL);
3704 	if (!gaudi2)
3705 		return -ENOMEM;
3706 
3707 	for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) {
3708 		if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid)
3709 			continue;
3710 
3711 		if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) {
3712 			dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n",
3713 				GAUDI2_EVENT_SIZE);
3714 			rc = -EINVAL;
3715 			goto free_gaudi2_device;
3716 		}
3717 
3718 		gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id;
3719 	}
3720 
3721 	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++)
3722 		gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int();
3723 
3724 	gaudi2->cpucp_info_get = gaudi2_cpucp_info_get;
3725 
3726 	hdev->asic_specific = gaudi2;
3727 
3728 	/* Create DMA pool for small allocations.
3729 	 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped
3730 	 * PI/CI registers allocated from this pool have this restriction
3731 	 */
3732 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev,
3733 					GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0);
3734 	if (!hdev->dma_pool) {
3735 		dev_err(hdev->dev, "failed to create DMA pool\n");
3736 		rc = -ENOMEM;
3737 		goto free_gaudi2_device;
3738 	}
3739 
3740 	rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev);
3741 	if (rc)
3742 		goto free_dma_pool;
3743 
3744 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
3745 	if (!hdev->cpu_accessible_dma_pool) {
3746 		dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n");
3747 		rc = -ENOMEM;
3748 		goto free_cpu_dma_mem;
3749 	}
3750 
3751 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem,
3752 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
3753 	if (rc) {
3754 		dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n");
3755 		rc = -EFAULT;
3756 		goto free_cpu_accessible_dma_pool;
3757 	}
3758 
3759 	gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
3760 								&gaudi2->virt_msix_db_dma_addr);
3761 	if (!gaudi2->virt_msix_db_cpu_addr) {
3762 		dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
3763 		rc = -ENOMEM;
3764 		goto free_cpu_accessible_dma_pool;
3765 	}
3766 
3767 	spin_lock_init(&gaudi2->hw_queues_lock);
3768 
3769 	gaudi2->scratchpad_bus_address = prop->mmu_pgt_addr + HMMU_PAGE_TABLES_SIZE + EDMA_PQS_SIZE;
3770 
3771 	gaudi2_user_mapped_blocks_init(hdev);
3772 
3773 	/* Initialize user interrupts */
3774 	gaudi2_user_interrupt_setup(hdev);
3775 
3776 	hdev->supports_coresight = true;
3777 	hdev->supports_sync_stream = true;
3778 	hdev->supports_cb_mapping = true;
3779 	hdev->supports_wait_for_multi_cs = false;
3780 
3781 	prop->supports_compute_reset = true;
3782 
3783 	/* Event queue sanity check added in FW version 1.11 */
3784 	if (hl_fw_version_cmp(hdev, 1, 11, 0) < 0)
3785 		hdev->event_queue.check_eqe_index = false;
3786 	else
3787 		hdev->event_queue.check_eqe_index = true;
3788 
3789 	hdev->asic_funcs->set_pci_memory_regions(hdev);
3790 
3791 	rc = gaudi2_special_blocks_iterator_config(hdev);
3792 	if (rc)
3793 		goto free_virt_msix_db_mem;
3794 
3795 	rc = gaudi2_test_queues_msgs_alloc(hdev);
3796 	if (rc)
3797 		goto special_blocks_free;
3798 
3799 	hdev->heartbeat_debug_info.cpu_queue_id = GAUDI2_QUEUE_ID_CPU_PQ;
3800 
3801 	return 0;
3802 
3803 special_blocks_free:
3804 	gaudi2_special_blocks_iterator_free(hdev);
3805 free_virt_msix_db_mem:
3806 	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3807 free_cpu_accessible_dma_pool:
3808 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3809 free_cpu_dma_mem:
3810 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3811 					hdev->cpu_accessible_dma_address);
3812 free_dma_pool:
3813 	dma_pool_destroy(hdev->dma_pool);
3814 free_gaudi2_device:
3815 	kfree(gaudi2);
3816 	return rc;
3817 }
3818 
gaudi2_sw_fini(struct hl_device * hdev)3819 static int gaudi2_sw_fini(struct hl_device *hdev)
3820 {
3821 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3822 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3823 
3824 	gaudi2_test_queues_msgs_free(hdev);
3825 
3826 	gaudi2_special_blocks_iterator_free(hdev);
3827 
3828 	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3829 
3830 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3831 
3832 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3833 						hdev->cpu_accessible_dma_address);
3834 
3835 	dma_pool_destroy(hdev->dma_pool);
3836 
3837 	kfree(gaudi2);
3838 
3839 	return 0;
3840 }
3841 
gaudi2_stop_qman_common(struct hl_device * hdev,u32 reg_base)3842 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base)
3843 {
3844 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP |
3845 						QM_GLBL_CFG1_CQF_STOP |
3846 						QM_GLBL_CFG1_CP_STOP);
3847 
3848 	/* stop also the ARC */
3849 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP);
3850 }
3851 
gaudi2_flush_qman_common(struct hl_device * hdev,u32 reg_base)3852 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base)
3853 {
3854 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH |
3855 						QM_GLBL_CFG1_CQF_FLUSH |
3856 						QM_GLBL_CFG1_CP_FLUSH);
3857 }
3858 
gaudi2_flush_qman_arc_common(struct hl_device * hdev,u32 reg_base)3859 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base)
3860 {
3861 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH);
3862 }
3863 
3864 /**
3865  * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters
3866  *
3867  * @hdev: pointer to the habanalabs device structure
3868  * @queue_id: queue to clear fence counters to
3869  * @skip_fence: if true set maximum fence value to all fence counters to avoid
3870  *              getting stuck on any fence value. otherwise set all fence
3871  *              counters to 0 (standard clear of fence counters)
3872  */
gaudi2_clear_qm_fence_counters_common(struct hl_device * hdev,u32 queue_id,bool skip_fence)3873 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id,
3874 						bool skip_fence)
3875 {
3876 	u32 size, reg_base;
3877 	u32 addr, val;
3878 
3879 	reg_base = gaudi2_qm_blocks_bases[queue_id];
3880 
3881 	addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET;
3882 	size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0;
3883 
3884 	/*
3885 	 * in case we want to make sure that QM that is stuck on a fence will
3886 	 * be released we should set the fence counter to a higher value that
3887 	 * the value the QM waiting for. to comply with any fence counter of
3888 	 * any value we set maximum fence value to all counters
3889 	 */
3890 	val = skip_fence ? U32_MAX : 0;
3891 	gaudi2_memset_device_lbw(hdev, addr, size, val);
3892 }
3893 
gaudi2_qman_manual_flush_common(struct hl_device * hdev,u32 queue_id)3894 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id)
3895 {
3896 	u32 reg_base = gaudi2_qm_blocks_bases[queue_id];
3897 
3898 	gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true);
3899 	gaudi2_flush_qman_common(hdev, reg_base);
3900 	gaudi2_flush_qman_arc_common(hdev, reg_base);
3901 }
3902 
gaudi2_stop_dma_qmans(struct hl_device * hdev)3903 static void gaudi2_stop_dma_qmans(struct hl_device *hdev)
3904 {
3905 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3906 	int dcore, inst;
3907 
3908 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3909 		goto stop_edma_qmans;
3910 
3911 	/* Stop CPs of PDMA QMANs */
3912 	gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE);
3913 	gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE);
3914 
3915 stop_edma_qmans:
3916 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3917 		return;
3918 
3919 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3920 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3921 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3922 			u32 qm_base;
3923 
3924 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3925 				continue;
3926 
3927 			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3928 					inst * DCORE_EDMA_OFFSET;
3929 
3930 			/* Stop CPs of EDMA QMANs */
3931 			gaudi2_stop_qman_common(hdev, qm_base);
3932 		}
3933 	}
3934 }
3935 
gaudi2_stop_mme_qmans(struct hl_device * hdev)3936 static void gaudi2_stop_mme_qmans(struct hl_device *hdev)
3937 {
3938 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3939 	u32 offset, i;
3940 
3941 	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3942 
3943 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
3944 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)))
3945 			continue;
3946 
3947 		gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3948 	}
3949 }
3950 
gaudi2_stop_tpc_qmans(struct hl_device * hdev)3951 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev)
3952 {
3953 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3954 	u32 reg_base;
3955 	int i;
3956 
3957 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3958 		return;
3959 
3960 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3961 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3962 			continue;
3963 
3964 		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3965 		gaudi2_stop_qman_common(hdev, reg_base);
3966 	}
3967 }
3968 
gaudi2_stop_rot_qmans(struct hl_device * hdev)3969 static void gaudi2_stop_rot_qmans(struct hl_device *hdev)
3970 {
3971 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3972 	u32 reg_base;
3973 	int i;
3974 
3975 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3976 		return;
3977 
3978 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3979 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3980 			continue;
3981 
3982 		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3983 		gaudi2_stop_qman_common(hdev, reg_base);
3984 	}
3985 }
3986 
gaudi2_stop_nic_qmans(struct hl_device * hdev)3987 static void gaudi2_stop_nic_qmans(struct hl_device *hdev)
3988 {
3989 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3990 	u32 reg_base, queue_id;
3991 	int i;
3992 
3993 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3994 		return;
3995 
3996 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3997 
3998 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3999 		if (!(hdev->nic_ports_mask & BIT(i)))
4000 			continue;
4001 
4002 		reg_base = gaudi2_qm_blocks_bases[queue_id];
4003 		gaudi2_stop_qman_common(hdev, reg_base);
4004 	}
4005 }
4006 
gaudi2_stall_dma_common(struct hl_device * hdev,u32 reg_base)4007 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base)
4008 {
4009 	u32 reg_val;
4010 
4011 	reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1);
4012 	WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val);
4013 }
4014 
gaudi2_dma_stall(struct hl_device * hdev)4015 static void gaudi2_dma_stall(struct hl_device *hdev)
4016 {
4017 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4018 	int dcore, inst;
4019 
4020 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
4021 		goto stall_edma;
4022 
4023 	gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE);
4024 	gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE);
4025 
4026 stall_edma:
4027 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4028 		return;
4029 
4030 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
4031 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
4032 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
4033 			u32 core_base;
4034 
4035 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
4036 				continue;
4037 
4038 			core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET +
4039 					inst * DCORE_EDMA_OFFSET;
4040 
4041 			/* Stall CPs of EDMA QMANs */
4042 			gaudi2_stall_dma_common(hdev, core_base);
4043 		}
4044 	}
4045 }
4046 
gaudi2_mme_stall(struct hl_device * hdev)4047 static void gaudi2_mme_stall(struct hl_device *hdev)
4048 {
4049 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4050 	u32 offset, i;
4051 
4052 	offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL;
4053 
4054 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
4055 		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
4056 			WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1);
4057 }
4058 
gaudi2_tpc_stall(struct hl_device * hdev)4059 static void gaudi2_tpc_stall(struct hl_device *hdev)
4060 {
4061 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4062 	u32 reg_base;
4063 	int i;
4064 
4065 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4066 		return;
4067 
4068 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
4069 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
4070 			continue;
4071 
4072 		reg_base = gaudi2_tpc_cfg_blocks_bases[i];
4073 		WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1);
4074 	}
4075 }
4076 
gaudi2_rotator_stall(struct hl_device * hdev)4077 static void gaudi2_rotator_stall(struct hl_device *hdev)
4078 {
4079 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4080 	u32 reg_val;
4081 	int i;
4082 
4083 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
4084 		return;
4085 
4086 	reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) |
4087 			FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) |
4088 			FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1);
4089 
4090 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
4091 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
4092 			continue;
4093 
4094 		WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val);
4095 	}
4096 }
4097 
gaudi2_disable_qman_common(struct hl_device * hdev,u32 reg_base)4098 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base)
4099 {
4100 	WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0);
4101 }
4102 
gaudi2_disable_dma_qmans(struct hl_device * hdev)4103 static void gaudi2_disable_dma_qmans(struct hl_device *hdev)
4104 {
4105 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4106 	int dcore, inst;
4107 
4108 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
4109 		goto stop_edma_qmans;
4110 
4111 	gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE);
4112 	gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE);
4113 
4114 stop_edma_qmans:
4115 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4116 		return;
4117 
4118 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
4119 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
4120 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
4121 			u32 qm_base;
4122 
4123 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
4124 				continue;
4125 
4126 			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
4127 					inst * DCORE_EDMA_OFFSET;
4128 
4129 			/* Disable CPs of EDMA QMANs */
4130 			gaudi2_disable_qman_common(hdev, qm_base);
4131 		}
4132 	}
4133 }
4134 
gaudi2_disable_mme_qmans(struct hl_device * hdev)4135 static void gaudi2_disable_mme_qmans(struct hl_device *hdev)
4136 {
4137 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4138 	u32 offset, i;
4139 
4140 	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
4141 
4142 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
4143 		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
4144 			gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
4145 }
4146 
gaudi2_disable_tpc_qmans(struct hl_device * hdev)4147 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev)
4148 {
4149 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4150 	u32 reg_base;
4151 	int i;
4152 
4153 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4154 		return;
4155 
4156 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
4157 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
4158 			continue;
4159 
4160 		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
4161 		gaudi2_disable_qman_common(hdev, reg_base);
4162 	}
4163 }
4164 
gaudi2_disable_rot_qmans(struct hl_device * hdev)4165 static void gaudi2_disable_rot_qmans(struct hl_device *hdev)
4166 {
4167 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4168 	u32 reg_base;
4169 	int i;
4170 
4171 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
4172 		return;
4173 
4174 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
4175 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
4176 			continue;
4177 
4178 		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
4179 		gaudi2_disable_qman_common(hdev, reg_base);
4180 	}
4181 }
4182 
gaudi2_disable_nic_qmans(struct hl_device * hdev)4183 static void gaudi2_disable_nic_qmans(struct hl_device *hdev)
4184 {
4185 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4186 	u32 reg_base, queue_id;
4187 	int i;
4188 
4189 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4190 		return;
4191 
4192 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4193 
4194 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4195 		if (!(hdev->nic_ports_mask & BIT(i)))
4196 			continue;
4197 
4198 		reg_base = gaudi2_qm_blocks_bases[queue_id];
4199 		gaudi2_disable_qman_common(hdev, reg_base);
4200 	}
4201 }
4202 
gaudi2_enable_timestamp(struct hl_device * hdev)4203 static void gaudi2_enable_timestamp(struct hl_device *hdev)
4204 {
4205 	/* Disable the timestamp counter */
4206 	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4207 
4208 	/* Zero the lower/upper parts of the 64-bit counter */
4209 	WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0);
4210 	WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0);
4211 
4212 	/* Enable the counter */
4213 	WREG32(mmPSOC_TIMESTAMP_BASE, 1);
4214 }
4215 
gaudi2_disable_timestamp(struct hl_device * hdev)4216 static void gaudi2_disable_timestamp(struct hl_device *hdev)
4217 {
4218 	/* Disable the timestamp counter */
4219 	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4220 }
4221 
gaudi2_irq_name(u16 irq_number)4222 static const char *gaudi2_irq_name(u16 irq_number)
4223 {
4224 	switch (irq_number) {
4225 	case GAUDI2_IRQ_NUM_EVENT_QUEUE:
4226 		return "gaudi2 cpu eq";
4227 	case GAUDI2_IRQ_NUM_COMPLETION:
4228 		return "gaudi2 completion";
4229 	case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
4230 		return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
4231 	case GAUDI2_IRQ_NUM_TPC_ASSERT:
4232 		return "gaudi2 tpc assert";
4233 	case GAUDI2_IRQ_NUM_UNEXPECTED_ERROR:
4234 		return "gaudi2 unexpected error";
4235 	case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
4236 		return "gaudi2 user completion";
4237 	case GAUDI2_IRQ_NUM_EQ_ERROR:
4238 		return "gaudi2 eq error";
4239 	default:
4240 		return "invalid";
4241 	}
4242 }
4243 
gaudi2_dec_disable_msix(struct hl_device * hdev,u32 max_irq_num)4244 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
4245 {
4246 	int i, irq, relative_idx;
4247 	struct hl_dec *dec;
4248 
4249 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) {
4250 		irq = pci_irq_vector(hdev->pdev, i);
4251 		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4252 
4253 		dec = hdev->dec + relative_idx / 2;
4254 
4255 		/* We pass different structures depending on the irq handler. For the abnormal
4256 		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4257 		 * user_interrupt entry
4258 		 */
4259 		free_irq(irq, ((relative_idx % 2) ?
4260 				(void *) dec :
4261 				(void *) &hdev->user_interrupt[dec->core_id]));
4262 	}
4263 }
4264 
gaudi2_dec_enable_msix(struct hl_device * hdev)4265 static int gaudi2_dec_enable_msix(struct hl_device *hdev)
4266 {
4267 	int rc, i, irq_init_cnt, irq, relative_idx;
4268 	struct hl_dec *dec;
4269 
4270 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
4271 			i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM;
4272 			i++, irq_init_cnt++) {
4273 
4274 		irq = pci_irq_vector(hdev->pdev, i);
4275 		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4276 
4277 		/* We pass different structures depending on the irq handler. For the abnormal
4278 		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4279 		 * user_interrupt entry
4280 		 *
4281 		 * TODO: change the dec abnrm to threaded irq
4282 		 */
4283 
4284 		dec = hdev->dec + relative_idx / 2;
4285 		if (relative_idx % 2) {
4286 			rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0,
4287 						gaudi2_irq_name(i), (void *) dec);
4288 		} else {
4289 			rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
4290 					(void *) &hdev->user_interrupt[dec->core_id]);
4291 		}
4292 
4293 		if (rc) {
4294 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4295 			goto free_dec_irqs;
4296 		}
4297 	}
4298 
4299 	return 0;
4300 
4301 free_dec_irqs:
4302 	gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt));
4303 	return rc;
4304 }
4305 
gaudi2_enable_msix(struct hl_device * hdev)4306 static int gaudi2_enable_msix(struct hl_device *hdev)
4307 {
4308 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4309 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4310 	int rc, irq, i, j, user_irq_init_cnt;
4311 	struct hl_cq *cq;
4312 
4313 	if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
4314 		return 0;
4315 
4316 	hl_init_cpu_for_irq(hdev);
4317 
4318 	rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
4319 					PCI_IRQ_MSIX);
4320 	if (rc < 0) {
4321 		dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n",
4322 			GAUDI2_MSIX_ENTRIES, rc);
4323 		return rc;
4324 	}
4325 
4326 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4327 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4328 	rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
4329 	if (rc) {
4330 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4331 		goto free_irq_vectors;
4332 	}
4333 
4334 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4335 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE),
4336 			&hdev->event_queue);
4337 	if (rc) {
4338 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4339 		goto free_completion_irq;
4340 	}
4341 
4342 	rc = gaudi2_dec_enable_msix(hdev);
4343 	if (rc) {
4344 		dev_err(hdev->dev, "Failed to enable decoder IRQ");
4345 		goto free_event_irq;
4346 	}
4347 
4348 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4349 	rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4350 					gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT),
4351 					&hdev->tpc_interrupt);
4352 	if (rc) {
4353 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4354 		goto free_dec_irq;
4355 	}
4356 
4357 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4358 	rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4359 					gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
4360 					&hdev->unexpected_error_interrupt);
4361 	if (rc) {
4362 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4363 		goto free_tpc_irq;
4364 	}
4365 
4366 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
4367 			user_irq_init_cnt < prop->user_interrupt_count;
4368 			i++, j++, user_irq_init_cnt++) {
4369 
4370 		irq = pci_irq_vector(hdev->pdev, i);
4371 		hl_set_irq_affinity(hdev, irq);
4372 		rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
4373 				&hdev->user_interrupt[j]);
4374 		if (rc) {
4375 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4376 			goto free_user_irq;
4377 		}
4378 	}
4379 
4380 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
4381 	rc = request_threaded_irq(irq, NULL, hl_irq_eq_error_interrupt_thread_handler,
4382 					IRQF_ONESHOT, gaudi2_irq_name(GAUDI2_IRQ_NUM_EQ_ERROR),
4383 					hdev);
4384 	if (rc) {
4385 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4386 		goto free_user_irq;
4387 	}
4388 
4389 	gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
4390 
4391 	return 0;
4392 
4393 free_user_irq:
4394 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count;
4395 			i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
4396 
4397 		irq = pci_irq_vector(hdev->pdev, i);
4398 		irq_set_affinity_and_hint(irq, NULL);
4399 		free_irq(irq, &hdev->user_interrupt[j]);
4400 	}
4401 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4402 	free_irq(irq, &hdev->unexpected_error_interrupt);
4403 free_tpc_irq:
4404 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4405 	free_irq(irq, &hdev->tpc_interrupt);
4406 free_dec_irq:
4407 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_DEC_LAST + 1);
4408 free_event_irq:
4409 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4410 	free_irq(irq, cq);
4411 
4412 free_completion_irq:
4413 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4414 	free_irq(irq, cq);
4415 
4416 free_irq_vectors:
4417 	pci_free_irq_vectors(hdev->pdev);
4418 
4419 	return rc;
4420 }
4421 
gaudi2_sync_irqs(struct hl_device * hdev)4422 static void gaudi2_sync_irqs(struct hl_device *hdev)
4423 {
4424 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4425 	int i, j;
4426 	int irq;
4427 
4428 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4429 		return;
4430 
4431 	/* Wait for all pending IRQs to be finished */
4432 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION));
4433 
4434 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) {
4435 		irq = pci_irq_vector(hdev->pdev, i);
4436 		synchronize_irq(irq);
4437 	}
4438 
4439 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT));
4440 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR));
4441 
4442 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
4443 										i++, j++) {
4444 		irq = pci_irq_vector(hdev->pdev, i);
4445 		synchronize_irq(irq);
4446 	}
4447 
4448 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
4449 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR));
4450 }
4451 
gaudi2_disable_msix(struct hl_device * hdev)4452 static void gaudi2_disable_msix(struct hl_device *hdev)
4453 {
4454 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4455 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4456 	struct hl_cq *cq;
4457 	int irq, i, j, k;
4458 
4459 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4460 		return;
4461 
4462 	gaudi2_sync_irqs(hdev);
4463 
4464 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4465 	free_irq(irq, &hdev->event_queue);
4466 
4467 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
4468 
4469 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4470 	free_irq(irq, &hdev->tpc_interrupt);
4471 
4472 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4473 	free_irq(irq, &hdev->unexpected_error_interrupt);
4474 
4475 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
4476 			k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
4477 
4478 		irq = pci_irq_vector(hdev->pdev, i);
4479 		irq_set_affinity_and_hint(irq, NULL);
4480 		free_irq(irq, &hdev->user_interrupt[j]);
4481 	}
4482 
4483 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4484 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4485 	free_irq(irq, cq);
4486 
4487 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
4488 	free_irq(irq, hdev);
4489 
4490 	pci_free_irq_vectors(hdev->pdev);
4491 
4492 	gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
4493 }
4494 
gaudi2_stop_dcore_dec(struct hl_device * hdev,int dcore_id)4495 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id)
4496 {
4497 	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4498 	u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4499 	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4500 	int rc;
4501 
4502 	if (hdev->pldm)
4503 		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4504 	else
4505 		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4506 
4507 	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4508 		dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
4509 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4510 			continue;
4511 
4512 		offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET;
4513 
4514 		WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0);
4515 
4516 		WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4517 
4518 		/* Wait till all traffic from decoder stops
4519 		 * before apply core reset.
4520 		 */
4521 		rc = hl_poll_timeout(
4522 				hdev,
4523 				mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4524 				graceful,
4525 				(graceful & graceful_pend_mask),
4526 				100,
4527 				timeout_usec);
4528 		if (rc)
4529 			dev_err(hdev->dev,
4530 				"Failed to stop traffic from DCORE%d Decoder %d\n",
4531 				dcore_id, dec_id);
4532 	}
4533 }
4534 
gaudi2_stop_pcie_dec(struct hl_device * hdev)4535 static void gaudi2_stop_pcie_dec(struct hl_device *hdev)
4536 {
4537 	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4538 	u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4539 	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4540 	int rc;
4541 
4542 	if (hdev->pldm)
4543 		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4544 	else
4545 		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4546 
4547 	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4548 		dec_bit = PCIE_DEC_SHIFT + dec_id;
4549 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4550 			continue;
4551 
4552 		offset = dec_id * PCIE_VDEC_OFFSET;
4553 
4554 		WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0);
4555 
4556 		WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4557 
4558 		/* Wait till all traffic from decoder stops
4559 		 * before apply core reset.
4560 		 */
4561 		rc = hl_poll_timeout(
4562 				hdev,
4563 				mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4564 				graceful,
4565 				(graceful & graceful_pend_mask),
4566 				100,
4567 				timeout_usec);
4568 		if (rc)
4569 			dev_err(hdev->dev,
4570 				"Failed to stop traffic from PCIe Decoder %d\n",
4571 				dec_id);
4572 	}
4573 }
4574 
gaudi2_stop_dec(struct hl_device * hdev)4575 static void gaudi2_stop_dec(struct hl_device *hdev)
4576 {
4577 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4578 	int dcore_id;
4579 
4580 	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0)
4581 		return;
4582 
4583 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
4584 		gaudi2_stop_dcore_dec(hdev, dcore_id);
4585 
4586 	gaudi2_stop_pcie_dec(hdev);
4587 }
4588 
gaudi2_set_arc_running_mode(struct hl_device * hdev,u32 cpu_id,u32 run_mode)4589 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4590 {
4591 	u32 reg_base, reg_val;
4592 
4593 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
4594 	if (run_mode == HL_ENGINE_CORE_RUN)
4595 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
4596 	else
4597 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
4598 
4599 	WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
4600 }
4601 
gaudi2_halt_arcs(struct hl_device * hdev)4602 static void gaudi2_halt_arcs(struct hl_device *hdev)
4603 {
4604 	u16 arc_id;
4605 
4606 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
4607 		if (gaudi2_is_arc_enabled(hdev, arc_id))
4608 			gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
4609 	}
4610 }
4611 
gaudi2_verify_arc_running_mode(struct hl_device * hdev,u32 cpu_id,u32 run_mode)4612 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4613 {
4614 	int rc;
4615 	u32 reg_base, val, ack_mask, timeout_usec = 100000;
4616 
4617 	if (hdev->pldm)
4618 		timeout_usec *= 100;
4619 
4620 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
4621 	if (run_mode == HL_ENGINE_CORE_RUN)
4622 		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
4623 	else
4624 		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
4625 
4626 	rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
4627 				val, ((val & ack_mask) == ack_mask),
4628 				1000, timeout_usec);
4629 
4630 	if (!rc) {
4631 		/* Clear */
4632 		val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
4633 		WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
4634 	}
4635 
4636 	return rc;
4637 }
4638 
gaudi2_reset_arcs(struct hl_device * hdev)4639 static void gaudi2_reset_arcs(struct hl_device *hdev)
4640 {
4641 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4642 	u16 arc_id;
4643 
4644 	if (!gaudi2)
4645 		return;
4646 
4647 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++)
4648 		if (gaudi2_is_arc_enabled(hdev, arc_id))
4649 			gaudi2_clr_arc_id_cap(hdev, arc_id);
4650 }
4651 
gaudi2_nic_qmans_manual_flush(struct hl_device * hdev)4652 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
4653 {
4654 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4655 	u32 queue_id;
4656 	int i;
4657 
4658 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4659 		return;
4660 
4661 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4662 
4663 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4664 		if (!(hdev->nic_ports_mask & BIT(i)))
4665 			continue;
4666 
4667 		gaudi2_qman_manual_flush_common(hdev, queue_id);
4668 	}
4669 }
4670 
gaudi2_set_engine_cores(struct hl_device * hdev,u32 * core_ids,u32 num_cores,u32 core_command)4671 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
4672 					u32 num_cores, u32 core_command)
4673 {
4674 	int i, rc;
4675 
4676 	for (i = 0 ; i < num_cores ; i++) {
4677 		if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
4678 			gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
4679 	}
4680 
4681 	for (i = 0 ; i < num_cores ; i++) {
4682 		if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
4683 			rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
4684 
4685 			if (rc) {
4686 				dev_err(hdev->dev, "failed to %s arc: %d\n",
4687 					(core_command == HL_ENGINE_CORE_HALT) ?
4688 					"HALT" : "RUN", core_ids[i]);
4689 				return -1;
4690 			}
4691 		}
4692 	}
4693 
4694 	return 0;
4695 }
4696 
gaudi2_set_tpc_engine_mode(struct hl_device * hdev,u32 engine_id,u32 engine_command)4697 static int gaudi2_set_tpc_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4698 {
4699 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4700 	u32 reg_base, reg_addr, reg_val, tpc_id;
4701 
4702 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4703 		return 0;
4704 
4705 	tpc_id = gaudi2_tpc_engine_id_to_tpc_id[engine_id];
4706 	if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + tpc_id)))
4707 		return 0;
4708 
4709 	reg_base = gaudi2_tpc_cfg_blocks_bases[tpc_id];
4710 	reg_addr = reg_base + TPC_CFG_STALL_OFFSET;
4711 	reg_val = FIELD_PREP(DCORE0_TPC0_CFG_TPC_STALL_V_MASK,
4712 			(engine_command == HL_ENGINE_STALL) ? 1 : 0);
4713 	WREG32(reg_addr, reg_val);
4714 
4715 	if (engine_command == HL_ENGINE_RESUME) {
4716 		reg_base = gaudi2_tpc_eml_cfg_blocks_bases[tpc_id];
4717 		reg_addr = reg_base + TPC_EML_CFG_DBG_CNT_OFFSET;
4718 		RMWREG32(reg_addr, 0x1, DCORE0_TPC0_EML_CFG_DBG_CNT_DBG_EXIT_MASK);
4719 	}
4720 
4721 	return 0;
4722 }
4723 
gaudi2_set_mme_engine_mode(struct hl_device * hdev,u32 engine_id,u32 engine_command)4724 static int gaudi2_set_mme_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4725 {
4726 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4727 	u32 reg_base, reg_addr, reg_val, mme_id;
4728 
4729 	mme_id = gaudi2_mme_engine_id_to_mme_id[engine_id];
4730 	if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + mme_id)))
4731 		return 0;
4732 
4733 	reg_base = gaudi2_mme_ctrl_lo_blocks_bases[mme_id];
4734 	reg_addr = reg_base + MME_CTRL_LO_QM_STALL_OFFSET;
4735 	reg_val = FIELD_PREP(DCORE0_MME_CTRL_LO_QM_STALL_V_MASK,
4736 			(engine_command == HL_ENGINE_STALL) ? 1 : 0);
4737 	WREG32(reg_addr, reg_val);
4738 
4739 	return 0;
4740 }
4741 
gaudi2_set_edma_engine_mode(struct hl_device * hdev,u32 engine_id,u32 engine_command)4742 static int gaudi2_set_edma_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4743 {
4744 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4745 	u32 reg_base, reg_addr, reg_val, edma_id;
4746 
4747 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4748 		return 0;
4749 
4750 	edma_id = gaudi2_edma_engine_id_to_edma_id[engine_id];
4751 	if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + edma_id)))
4752 		return 0;
4753 
4754 	reg_base = gaudi2_dma_core_blocks_bases[edma_id];
4755 	reg_addr = reg_base + EDMA_CORE_CFG_STALL_OFFSET;
4756 	reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK,
4757 			(engine_command == HL_ENGINE_STALL) ? 1 : 0);
4758 	WREG32(reg_addr, reg_val);
4759 
4760 	if (engine_command == HL_ENGINE_STALL) {
4761 		reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK, 0x1) |
4762 				FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_FLUSH_MASK, 0x1);
4763 		WREG32(reg_addr, reg_val);
4764 	}
4765 
4766 	return 0;
4767 }
4768 
gaudi2_set_engine_modes(struct hl_device * hdev,u32 * engine_ids,u32 num_engines,u32 engine_command)4769 static int gaudi2_set_engine_modes(struct hl_device *hdev,
4770 		u32 *engine_ids, u32 num_engines, u32 engine_command)
4771 {
4772 	int i, rc;
4773 
4774 	for (i = 0 ; i < num_engines ; ++i) {
4775 		switch (engine_ids[i]) {
4776 		case GAUDI2_DCORE0_ENGINE_ID_TPC_0 ... GAUDI2_DCORE0_ENGINE_ID_TPC_5:
4777 		case GAUDI2_DCORE1_ENGINE_ID_TPC_0 ... GAUDI2_DCORE1_ENGINE_ID_TPC_5:
4778 		case GAUDI2_DCORE2_ENGINE_ID_TPC_0 ... GAUDI2_DCORE2_ENGINE_ID_TPC_5:
4779 		case GAUDI2_DCORE3_ENGINE_ID_TPC_0 ... GAUDI2_DCORE3_ENGINE_ID_TPC_5:
4780 			rc = gaudi2_set_tpc_engine_mode(hdev, engine_ids[i], engine_command);
4781 			if (rc)
4782 				return rc;
4783 
4784 			break;
4785 		case GAUDI2_DCORE0_ENGINE_ID_MME:
4786 		case GAUDI2_DCORE1_ENGINE_ID_MME:
4787 		case GAUDI2_DCORE2_ENGINE_ID_MME:
4788 		case GAUDI2_DCORE3_ENGINE_ID_MME:
4789 			rc = gaudi2_set_mme_engine_mode(hdev, engine_ids[i], engine_command);
4790 			if (rc)
4791 				return rc;
4792 
4793 			break;
4794 		case GAUDI2_DCORE0_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE0_ENGINE_ID_EDMA_1:
4795 		case GAUDI2_DCORE1_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE1_ENGINE_ID_EDMA_1:
4796 		case GAUDI2_DCORE2_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE2_ENGINE_ID_EDMA_1:
4797 		case GAUDI2_DCORE3_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE3_ENGINE_ID_EDMA_1:
4798 			rc = gaudi2_set_edma_engine_mode(hdev, engine_ids[i], engine_command);
4799 			if (rc)
4800 				return rc;
4801 
4802 			break;
4803 		default:
4804 			dev_err(hdev->dev, "Invalid engine ID %u\n", engine_ids[i]);
4805 			return -EINVAL;
4806 		}
4807 	}
4808 
4809 	return 0;
4810 }
4811 
gaudi2_set_engines(struct hl_device * hdev,u32 * engine_ids,u32 num_engines,u32 engine_command)4812 static int gaudi2_set_engines(struct hl_device *hdev, u32 *engine_ids,
4813 					u32 num_engines, u32 engine_command)
4814 {
4815 	switch (engine_command) {
4816 	case HL_ENGINE_CORE_HALT:
4817 	case HL_ENGINE_CORE_RUN:
4818 		return gaudi2_set_engine_cores(hdev, engine_ids, num_engines, engine_command);
4819 
4820 	case HL_ENGINE_STALL:
4821 	case HL_ENGINE_RESUME:
4822 		return gaudi2_set_engine_modes(hdev, engine_ids, num_engines, engine_command);
4823 
4824 	default:
4825 		dev_err(hdev->dev, "failed to execute command id %u\n", engine_command);
4826 		return -EINVAL;
4827 	}
4828 }
4829 
gaudi2_halt_engines(struct hl_device * hdev,bool hard_reset,bool fw_reset)4830 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4831 {
4832 	u32 wait_timeout_ms;
4833 
4834 	if (hdev->pldm)
4835 		wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC;
4836 	else
4837 		wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
4838 
4839 	if (fw_reset)
4840 		goto skip_engines;
4841 
4842 	gaudi2_stop_dma_qmans(hdev);
4843 	gaudi2_stop_mme_qmans(hdev);
4844 	gaudi2_stop_tpc_qmans(hdev);
4845 	gaudi2_stop_rot_qmans(hdev);
4846 	gaudi2_stop_nic_qmans(hdev);
4847 	msleep(wait_timeout_ms);
4848 
4849 	gaudi2_halt_arcs(hdev);
4850 	gaudi2_dma_stall(hdev);
4851 	gaudi2_mme_stall(hdev);
4852 	gaudi2_tpc_stall(hdev);
4853 	gaudi2_rotator_stall(hdev);
4854 
4855 	msleep(wait_timeout_ms);
4856 
4857 	gaudi2_stop_dec(hdev);
4858 
4859 	/*
4860 	 * in case of soft reset do a manual flush for QMANs (currently called
4861 	 * only for NIC QMANs
4862 	 */
4863 	if (!hard_reset)
4864 		gaudi2_nic_qmans_manual_flush(hdev);
4865 
4866 	gaudi2_disable_dma_qmans(hdev);
4867 	gaudi2_disable_mme_qmans(hdev);
4868 	gaudi2_disable_tpc_qmans(hdev);
4869 	gaudi2_disable_rot_qmans(hdev);
4870 	gaudi2_disable_nic_qmans(hdev);
4871 	gaudi2_disable_timestamp(hdev);
4872 
4873 skip_engines:
4874 	if (hard_reset) {
4875 		gaudi2_disable_msix(hdev);
4876 		return;
4877 	}
4878 
4879 	gaudi2_sync_irqs(hdev);
4880 }
4881 
gaudi2_init_firmware_preload_params(struct hl_device * hdev)4882 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
4883 {
4884 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
4885 
4886 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
4887 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
4888 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
4889 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
4890 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
4891 	pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
4892 	pre_fw_load->wait_for_preboot_extended_timeout =
4893 		GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC;
4894 }
4895 
gaudi2_init_firmware_loader(struct hl_device * hdev)4896 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
4897 {
4898 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4899 	struct dynamic_fw_load_mgr *dynamic_loader;
4900 	struct cpu_dyn_regs *dyn_regs;
4901 
4902 	/* fill common fields */
4903 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
4904 	fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE;
4905 	fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE;
4906 	fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC;
4907 	fw_loader->skip_bmc = false;
4908 	fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
4909 	fw_loader->dram_bar_id = DRAM_BAR_ID;
4910 	fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
4911 
4912 	/* here we update initial values for few specific dynamic regs (as
4913 	 * before reading the first descriptor from FW those value has to be
4914 	 * hard-coded). in later stages of the protocol those values will be
4915 	 * updated automatically by reading the FW descriptor so data there
4916 	 * will always be up-to-date
4917 	 */
4918 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
4919 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
4920 	dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
4921 	dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
4922 	dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC;
4923 }
4924 
gaudi2_init_cpu(struct hl_device * hdev)4925 static int gaudi2_init_cpu(struct hl_device *hdev)
4926 {
4927 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4928 	int rc;
4929 
4930 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4931 		return 0;
4932 
4933 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU)
4934 		return 0;
4935 
4936 	rc = hl_fw_init_cpu(hdev);
4937 	if (rc)
4938 		return rc;
4939 
4940 	gaudi2->hw_cap_initialized |= HW_CAP_CPU;
4941 
4942 	return 0;
4943 }
4944 
gaudi2_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)4945 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4946 {
4947 	struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
4948 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4949 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4950 	struct cpu_dyn_regs *dyn_regs;
4951 	struct hl_eq *eq;
4952 	u32 status;
4953 	int err;
4954 
4955 	if (!hdev->cpu_queues_enable)
4956 		return 0;
4957 
4958 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
4959 		return 0;
4960 
4961 	eq = &hdev->event_queue;
4962 
4963 	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4964 
4965 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4966 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4967 
4968 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4969 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4970 
4971 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address));
4972 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address));
4973 
4974 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4975 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4976 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4977 
4978 	/* Used for EQ CI */
4979 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4980 
4981 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
4982 
4983 	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4984 
4985 	/* Let the ARC know we are ready as it is now handling those queues  */
4986 
4987 	WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
4988 		gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
4989 
4990 	err = hl_poll_timeout(
4991 		hdev,
4992 		mmCPU_IF_QUEUE_INIT,
4993 		status,
4994 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
4995 		1000,
4996 		cpu_timeout);
4997 
4998 	if (err) {
4999 		dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n");
5000 		return -EIO;
5001 	}
5002 
5003 	/* update FW application security bits */
5004 	if (prop->fw_cpu_boot_dev_sts0_valid)
5005 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
5006 
5007 	if (prop->fw_cpu_boot_dev_sts1_valid)
5008 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
5009 
5010 	gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q;
5011 	return 0;
5012 }
5013 
gaudi2_init_qman_pq(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)5014 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
5015 				u32 queue_id_base)
5016 {
5017 	struct hl_hw_queue *q;
5018 	u32 pq_id, pq_offset;
5019 
5020 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
5021 		q = &hdev->kernel_queues[queue_id_base + pq_id];
5022 		pq_offset = pq_id * 4;
5023 
5024 		if (q->dram_bd) {
5025 			WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
5026 					lower_32_bits(q->pq_dram_address));
5027 			WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
5028 					upper_32_bits(q->pq_dram_address));
5029 		} else {
5030 			WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
5031 					lower_32_bits(q->bus_address));
5032 			WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
5033 					upper_32_bits(q->bus_address));
5034 		}
5035 		WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
5036 		WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
5037 		WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
5038 	}
5039 }
5040 
gaudi2_init_qman_cp(struct hl_device * hdev,u32 reg_base)5041 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base)
5042 {
5043 	u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi;
5044 
5045 	mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
5046 	mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
5047 	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
5048 	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
5049 
5050 	for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) {
5051 		cp_offset = cp_id * 4;
5052 
5053 		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo);
5054 		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset,	mtr_base_hi);
5055 		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset,	so_base_lo);
5056 		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset,	so_base_hi);
5057 	}
5058 
5059 	/* allow QMANs to accept work from ARC CQF */
5060 	WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1));
5061 }
5062 
gaudi2_init_qman_pqc(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)5063 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base,
5064 				u32 queue_id_base)
5065 {
5066 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5067 	u32 pq_id, pq_offset, so_base_lo, so_base_hi;
5068 
5069 	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
5070 	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
5071 
5072 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
5073 		pq_offset = pq_id * 4;
5074 
5075 		/* Configure QMAN HBW to scratchpad as it is not needed */
5076 		WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset,
5077 				lower_32_bits(gaudi2->scratchpad_bus_address));
5078 		WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset,
5079 				upper_32_bits(gaudi2->scratchpad_bus_address));
5080 		WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset,
5081 				ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry)));
5082 
5083 		WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0);
5084 		WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA);
5085 		WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo);
5086 		WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi);
5087 	}
5088 
5089 	/* Enable QMAN H/W completion */
5090 	WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
5091 }
5092 
gaudi2_get_dyn_sp_reg(struct hl_device * hdev,u32 queue_id_base)5093 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base)
5094 {
5095 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5096 	u32 sp_reg_addr;
5097 
5098 	switch (queue_id_base) {
5099 	case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3:
5100 		fallthrough;
5101 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
5102 		fallthrough;
5103 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
5104 		fallthrough;
5105 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
5106 		fallthrough;
5107 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
5108 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
5109 		break;
5110 	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
5111 		fallthrough;
5112 	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
5113 		fallthrough;
5114 	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
5115 		fallthrough;
5116 	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
5117 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
5118 		break;
5119 	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
5120 		fallthrough;
5121 	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
5122 		fallthrough;
5123 	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
5124 		fallthrough;
5125 	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
5126 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
5127 		break;
5128 	case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3:
5129 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl);
5130 		break;
5131 	case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3:
5132 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
5133 		break;
5134 	default:
5135 		dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base);
5136 		return 0;
5137 	}
5138 
5139 	return sp_reg_addr;
5140 }
5141 
gaudi2_init_qman_common(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)5142 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
5143 					u32 queue_id_base)
5144 {
5145 	u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset;
5146 	int map_table_entry;
5147 
5148 	WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot);
5149 
5150 	irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base);
5151 	WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset));
5152 	WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset));
5153 
5154 	map_table_entry = gaudi2_qman_async_event_id[queue_id_base];
5155 	WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET,
5156 		gaudi2_irq_map_table[map_table_entry].cpu_id);
5157 
5158 	WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK);
5159 
5160 	WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT);
5161 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0);
5162 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
5163 
5164 	/* Enable the QMAN channel.
5165 	 * PDMA QMAN configuration is different, as we do not allow user to
5166 	 * access some of the CPs.
5167 	 * PDMA0: CP2/3 are reserved for the ARC usage.
5168 	 * PDMA1: CP1/2/3 are reserved for the ARC usage.
5169 	 */
5170 	if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
5171 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
5172 	else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
5173 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
5174 	else
5175 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
5176 }
5177 
gaudi2_init_qman(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)5178 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
5179 		u32 queue_id_base)
5180 {
5181 	u32 pq_id;
5182 
5183 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
5184 		hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
5185 
5186 	gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
5187 	gaudi2_init_qman_cp(hdev, reg_base);
5188 	gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base);
5189 	gaudi2_init_qman_common(hdev, reg_base, queue_id_base);
5190 }
5191 
gaudi2_init_dma_core(struct hl_device * hdev,u32 reg_base,u32 dma_core_id,bool is_secure)5192 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base,
5193 				u32 dma_core_id, bool is_secure)
5194 {
5195 	u32 prot, irq_handler_offset;
5196 	struct cpu_dyn_regs *dyn_regs;
5197 	int map_table_entry;
5198 
5199 	prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT;
5200 	if (is_secure)
5201 		prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT;
5202 
5203 	WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot);
5204 
5205 	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5206 	irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
5207 
5208 	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET,
5209 			lower_32_bits(CFG_BASE + irq_handler_offset));
5210 
5211 	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET,
5212 			upper_32_bits(CFG_BASE + irq_handler_offset));
5213 
5214 	map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id];
5215 	WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET,
5216 		gaudi2_irq_map_table[map_table_entry].cpu_id);
5217 
5218 	/* Enable the DMA channel */
5219 	WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT);
5220 }
5221 
gaudi2_init_kdma(struct hl_device * hdev)5222 static void gaudi2_init_kdma(struct hl_device *hdev)
5223 {
5224 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5225 	u32 reg_base;
5226 
5227 	if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA)
5228 		return;
5229 
5230 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA];
5231 
5232 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true);
5233 
5234 	gaudi2->hw_cap_initialized |= HW_CAP_KDMA;
5235 }
5236 
gaudi2_init_pdma(struct hl_device * hdev)5237 static void gaudi2_init_pdma(struct hl_device *hdev)
5238 {
5239 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5240 	u32 reg_base;
5241 
5242 	if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK)
5243 		return;
5244 
5245 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0];
5246 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false);
5247 
5248 	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0];
5249 	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0);
5250 
5251 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1];
5252 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false);
5253 
5254 	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0];
5255 	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0);
5256 
5257 	gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK;
5258 }
5259 
gaudi2_init_edma_instance(struct hl_device * hdev,u8 seq)5260 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq)
5261 {
5262 	u32 reg_base, base_edma_core_id, base_edma_qman_id;
5263 
5264 	base_edma_core_id = DMA_CORE_ID_EDMA0 + seq;
5265 	base_edma_qman_id = edma_stream_base[seq];
5266 
5267 	reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id];
5268 	gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false);
5269 
5270 	reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id];
5271 	gaudi2_init_qman(hdev, reg_base, base_edma_qman_id);
5272 }
5273 
gaudi2_init_edma(struct hl_device * hdev)5274 static void gaudi2_init_edma(struct hl_device *hdev)
5275 {
5276 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5277 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5278 	int dcore, inst;
5279 
5280 	if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK)
5281 		return;
5282 
5283 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
5284 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
5285 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
5286 
5287 			if (!(prop->edma_enabled_mask & BIT(seq)))
5288 				continue;
5289 
5290 			gaudi2_init_edma_instance(hdev, seq);
5291 
5292 			gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq);
5293 		}
5294 	}
5295 }
5296 
5297 /*
5298  * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
5299  * @hdev: pointer to habanalabs device structure.
5300  * @sob_id: sync object ID.
5301  * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
5302  * @interrupt_id: interrupt ID.
5303  *
5304  * Some initiators cannot have HBW address in their completion address registers, and thus cannot
5305  * write directly to the HBW host memory of the virtual MSI-X doorbell.
5306  * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
5307  *
5308  * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
5309  * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
5310  * completion, by decrementing the sync object value and re-arming the monitor.
5311  */
gaudi2_arm_monitors_for_virt_msix_db(struct hl_device * hdev,u32 sob_id,u32 first_mon_id,u32 interrupt_id)5312 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
5313 							u32 first_mon_id, u32 interrupt_id)
5314 {
5315 	u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
5316 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5317 	u64 addr;
5318 	u8 mask;
5319 
5320 	/* Reset the SOB value */
5321 	sob_offset = sob_id * sizeof(u32);
5322 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
5323 
5324 	/* Configure 3 monitors:
5325 	 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
5326 	 * 2. Decrement SOB value by 1.
5327 	 * 3. Re-arm the master monitor.
5328 	 */
5329 
5330 	first_mon_offset = first_mon_id * sizeof(u32);
5331 
5332 	/* 2nd monitor: Decrement SOB value by 1 */
5333 	mon_offset = first_mon_offset + sizeof(u32);
5334 
5335 	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
5336 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5337 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5338 
5339 	payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
5340 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
5341 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
5342 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5343 
5344 	/* 3rd monitor: Re-arm the master monitor */
5345 	mon_offset = first_mon_offset + 2 * sizeof(u32);
5346 
5347 	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
5348 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5349 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5350 
5351 	sob_group = sob_id / 8;
5352 	mask = ~BIT(sob_id & 0x7);
5353 	mode = 0; /* comparison mode is "greater than or equal to" */
5354 	arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
5355 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
5356 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
5357 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
5358 
5359 	payload = arm;
5360 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5361 
5362 	/* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
5363 	mon_offset = first_mon_offset;
5364 
5365 	config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
5366 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
5367 
5368 	addr = gaudi2->virt_msix_db_dma_addr;
5369 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5370 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5371 
5372 	payload = interrupt_id;
5373 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5374 
5375 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
5376 }
5377 
gaudi2_prepare_sm_for_virt_msix_db(struct hl_device * hdev)5378 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
5379 {
5380 	u32 decoder_id, sob_id, first_mon_id, interrupt_id;
5381 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5382 
5383 	/* Decoder normal/abnormal interrupts */
5384 	for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
5385 		if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
5386 			continue;
5387 
5388 		sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5389 		first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
5390 		interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
5391 		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5392 
5393 		sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5394 		first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
5395 		interrupt_id += 1;
5396 		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5397 	}
5398 }
5399 
gaudi2_init_sm(struct hl_device * hdev)5400 static void gaudi2_init_sm(struct hl_device *hdev)
5401 {
5402 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5403 	u64 cq_address;
5404 	u32 reg_val;
5405 	int i;
5406 
5407 	/* Enable HBW/LBW CQ for completion monitors */
5408 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5409 	reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1);
5410 
5411 	for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++)
5412 		WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5413 
5414 	/* Enable only HBW CQ for KDMA completion monitor */
5415 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5416 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5417 
5418 	/* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */
5419 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr));
5420 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr));
5421 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
5422 
5423 	for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
5424 		cq_address =
5425 			hdev->completion_queue[i].bus_address;
5426 
5427 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i),
5428 							lower_32_bits(cq_address));
5429 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i),
5430 							upper_32_bits(cq_address));
5431 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i),
5432 							ilog2(HL_CQ_SIZE_IN_BYTES));
5433 	}
5434 
5435 	/* Configure kernel ASID and MMU BP*/
5436 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
5437 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
5438 
5439 	/* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
5440 	gaudi2_prepare_sm_for_virt_msix_db(hdev);
5441 }
5442 
gaudi2_init_mme_acc(struct hl_device * hdev,u32 reg_base)5443 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
5444 {
5445 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5446 	u32 reg_val;
5447 	int i;
5448 
5449 	reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0);
5450 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1);
5451 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1);
5452 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1);
5453 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1);
5454 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1);
5455 
5456 	WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val);
5457 	WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF);
5458 
5459 	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) {
5460 		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i);
5461 		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]);
5462 	}
5463 }
5464 
gaudi2_init_dcore_mme(struct hl_device * hdev,int dcore_id,bool config_qman_only)5465 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
5466 							bool config_qman_only)
5467 {
5468 	u32 queue_id_base, reg_base;
5469 
5470 	switch (dcore_id) {
5471 	case 0:
5472 		queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
5473 		break;
5474 	case 1:
5475 		queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
5476 		break;
5477 	case 2:
5478 		queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
5479 		break;
5480 	case 3:
5481 		queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
5482 		break;
5483 	default:
5484 		dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
5485 		return;
5486 	}
5487 
5488 	if (!config_qman_only) {
5489 		reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
5490 		gaudi2_init_mme_acc(hdev, reg_base);
5491 	}
5492 
5493 	reg_base = gaudi2_qm_blocks_bases[queue_id_base];
5494 	gaudi2_init_qman(hdev, reg_base, queue_id_base);
5495 }
5496 
gaudi2_init_mme(struct hl_device * hdev)5497 static void gaudi2_init_mme(struct hl_device *hdev)
5498 {
5499 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5500 	int i;
5501 
5502 	if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK)
5503 		return;
5504 
5505 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
5506 		gaudi2_init_dcore_mme(hdev, i, false);
5507 
5508 		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i);
5509 	}
5510 }
5511 
gaudi2_init_tpc_cfg(struct hl_device * hdev,u32 reg_base)5512 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base)
5513 {
5514 	/* Mask arithmetic and QM interrupts in TPC */
5515 	WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE);
5516 
5517 	/* Set 16 cache lines */
5518 	WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET,
5519 			2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT);
5520 }
5521 
5522 struct gaudi2_tpc_init_cfg_data {
5523 	enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES];
5524 };
5525 
gaudi2_init_tpc_config(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)5526 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
5527 					u32 offset, struct iterate_module_ctx *ctx)
5528 {
5529 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5530 	struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
5531 	u32 queue_id_base;
5532 	u8 seq;
5533 
5534 	queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN);
5535 
5536 	if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1))
5537 		/* gets last sequence number */
5538 		seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE;
5539 	else
5540 		seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
5541 
5542 	gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset);
5543 	gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base);
5544 
5545 	gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq);
5546 }
5547 
gaudi2_init_tpc(struct hl_device * hdev)5548 static void gaudi2_init_tpc(struct hl_device *hdev)
5549 {
5550 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5551 	struct gaudi2_tpc_init_cfg_data init_cfg_data;
5552 	struct iterate_module_ctx tpc_iter;
5553 
5554 	if (!hdev->asic_prop.tpc_enabled_mask)
5555 		return;
5556 
5557 	if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK)
5558 		return;
5559 
5560 	init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0;
5561 	init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0;
5562 	init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0;
5563 	init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0;
5564 	tpc_iter.fn = &gaudi2_init_tpc_config;
5565 	tpc_iter.data = &init_cfg_data;
5566 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
5567 }
5568 
gaudi2_init_rotator(struct hl_device * hdev)5569 static void gaudi2_init_rotator(struct hl_device *hdev)
5570 {
5571 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5572 	u32 i, reg_base, queue_id;
5573 
5574 	queue_id = GAUDI2_QUEUE_ID_ROT_0_0;
5575 
5576 	for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
5577 		reg_base = gaudi2_qm_blocks_bases[queue_id];
5578 		gaudi2_init_qman(hdev, reg_base, queue_id);
5579 
5580 		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i);
5581 	}
5582 }
5583 
gaudi2_init_vdec_brdg_ctrl(struct hl_device * hdev,u64 base_addr,u32 decoder_id)5584 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
5585 {
5586 	u32 sob_id;
5587 
5588 	/* VCMD normal interrupt */
5589 	sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5590 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
5591 			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5592 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5593 
5594 	/* VCMD abnormal interrupt */
5595 	sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5596 	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
5597 			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5598 	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5599 }
5600 
gaudi2_init_dec(struct hl_device * hdev)5601 static void gaudi2_init_dec(struct hl_device *hdev)
5602 {
5603 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5604 	u32 dcore_id, dec_id, dec_bit;
5605 	u64 base_addr;
5606 
5607 	if (!hdev->asic_prop.decoder_enabled_mask)
5608 		return;
5609 
5610 	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK)
5611 		return;
5612 
5613 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5614 		for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
5615 			dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
5616 
5617 			if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5618 				continue;
5619 
5620 			base_addr =  mmDCORE0_DEC0_CMD_BASE +
5621 					BRDG_CTRL_BLOCK_OFFSET +
5622 					dcore_id * DCORE_OFFSET +
5623 					dec_id * DCORE_VDEC_OFFSET;
5624 
5625 			gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5626 
5627 			gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5628 		}
5629 
5630 	for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) {
5631 		dec_bit = PCIE_DEC_SHIFT + dec_id;
5632 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5633 			continue;
5634 
5635 		base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET +
5636 				dec_id * DCORE_VDEC_OFFSET;
5637 
5638 		gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5639 
5640 		gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5641 	}
5642 }
5643 
gaudi2_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 stlb_base,u32 asid,u64 phys_addr)5644 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
5645 					u32 stlb_base, u32 asid, u64 phys_addr)
5646 {
5647 	u32 status, timeout_usec;
5648 	int rc;
5649 
5650 	if (hdev->pldm || !hdev->pdev)
5651 		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5652 	else
5653 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5654 
5655 	WREG32(stlb_base + STLB_ASID_OFFSET, asid);
5656 	WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
5657 	WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT);
5658 	WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000);
5659 
5660 	rc = hl_poll_timeout(
5661 		hdev,
5662 		stlb_base + STLB_BUSY_OFFSET,
5663 		status,
5664 		!(status & 0x80000000),
5665 		1000,
5666 		timeout_usec);
5667 
5668 	if (rc) {
5669 		dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid);
5670 		return rc;
5671 	}
5672 
5673 	return 0;
5674 }
5675 
gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device * hdev,u32 stlb_base,u32 start_offset,u32 inv_start_val,u32 flags)5676 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base,
5677 					u32 start_offset, u32 inv_start_val,
5678 					u32 flags)
5679 {
5680 	/* clear PMMU mem line cache (only needed in mmu range invalidation) */
5681 	if (flags & MMU_OP_CLEAR_MEMCACHE)
5682 		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1);
5683 
5684 	if (flags & MMU_OP_SKIP_LOW_CACHE_INV)
5685 		return;
5686 
5687 	WREG32(stlb_base + start_offset, inv_start_val);
5688 }
5689 
gaudi2_mmu_invalidate_cache_status_poll(struct hl_device * hdev,u32 stlb_base,struct gaudi2_cache_invld_params * inv_params)5690 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base,
5691 						struct gaudi2_cache_invld_params *inv_params)
5692 {
5693 	u32 status, timeout_usec, start_offset;
5694 	int rc;
5695 
5696 	timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC :
5697 					GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5698 
5699 	/* poll PMMU mem line cache (only needed in mmu range invalidation) */
5700 	if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) {
5701 		rc = hl_poll_timeout(
5702 			hdev,
5703 			mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS,
5704 			status,
5705 			status & 0x1,
5706 			1000,
5707 			timeout_usec);
5708 
5709 		if (rc)
5710 			return rc;
5711 
5712 		/* Need to manually reset the status to 0 */
5713 		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0);
5714 	}
5715 
5716 	/* Lower cache does not work with cache lines, hence we can skip its
5717 	 * invalidation upon map and invalidate only upon unmap
5718 	 */
5719 	if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV)
5720 		return 0;
5721 
5722 	start_offset = inv_params->range_invalidation ?
5723 			STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET;
5724 
5725 	rc = hl_poll_timeout(
5726 		hdev,
5727 		stlb_base + start_offset,
5728 		status,
5729 		!(status & 0x1),
5730 		1000,
5731 		timeout_usec);
5732 
5733 	return rc;
5734 }
5735 
gaudi2_is_hmmu_enabled(struct hl_device * hdev,int dcore_id,int hmmu_id)5736 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id)
5737 {
5738 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5739 	u32 hw_cap;
5740 
5741 	hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id);
5742 
5743 	if (gaudi2->hw_cap_initialized & hw_cap)
5744 		return true;
5745 
5746 	return false;
5747 }
5748 
5749 /* this function shall be called only for HMMUs for which capability bit is set */
get_hmmu_stlb_base(int dcore_id,int hmmu_id)5750 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id)
5751 {
5752 	u32 offset;
5753 
5754 	offset =  (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5755 	return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset);
5756 }
5757 
gaudi2_mmu_invalidate_cache_trigger(struct hl_device * hdev,u32 stlb_base,struct gaudi2_cache_invld_params * inv_params)5758 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base,
5759 						struct gaudi2_cache_invld_params *inv_params)
5760 {
5761 	u32 start_offset;
5762 
5763 	if (inv_params->range_invalidation) {
5764 		/* Set the addresses range
5765 		 * Note: that the start address we set in register, is not included in
5766 		 * the range of the invalidation, by design.
5767 		 * that's why we need to set lower address than the one we actually
5768 		 * want to be included in the range invalidation.
5769 		 */
5770 		u64 start = inv_params->start_va - 1;
5771 
5772 		start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET;
5773 
5774 		WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET,
5775 				start >> MMU_RANGE_INV_VA_LSB_SHIFT);
5776 
5777 		WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET,
5778 				start >> MMU_RANGE_INV_VA_MSB_SHIFT);
5779 
5780 		WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET,
5781 				inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT);
5782 
5783 		WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET,
5784 				inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT);
5785 	} else {
5786 		start_offset = STLB_INV_ALL_START_OFFSET;
5787 	}
5788 
5789 	gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset,
5790 						inv_params->inv_start_val, inv_params->flags);
5791 }
5792 
gaudi2_hmmu_invalidate_cache_trigger(struct hl_device * hdev,int dcore_id,int hmmu_id,struct gaudi2_cache_invld_params * inv_params)5793 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev,
5794 						int dcore_id, int hmmu_id,
5795 						struct gaudi2_cache_invld_params *inv_params)
5796 {
5797 	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5798 
5799 	gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params);
5800 }
5801 
gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device * hdev,int dcore_id,int hmmu_id,struct gaudi2_cache_invld_params * inv_params)5802 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev,
5803 						int dcore_id, int hmmu_id,
5804 						struct gaudi2_cache_invld_params *inv_params)
5805 {
5806 	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5807 
5808 	return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params);
5809 }
5810 
gaudi2_hmmus_invalidate_cache(struct hl_device * hdev,struct gaudi2_cache_invld_params * inv_params)5811 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev,
5812 						struct gaudi2_cache_invld_params *inv_params)
5813 {
5814 	int dcore_id, hmmu_id;
5815 
5816 	/* first send all invalidation commands */
5817 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5818 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5819 			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5820 				continue;
5821 
5822 			gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params);
5823 		}
5824 	}
5825 
5826 	/* next, poll all invalidations status */
5827 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5828 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5829 			int rc;
5830 
5831 			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5832 				continue;
5833 
5834 			rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id,
5835 										inv_params);
5836 			if (rc)
5837 				return rc;
5838 		}
5839 	}
5840 
5841 	return 0;
5842 }
5843 
gaudi2_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)5844 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
5845 {
5846 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5847 	struct gaudi2_cache_invld_params invld_params;
5848 	int rc = 0;
5849 
5850 	if (hdev->reset_info.hard_reset_pending)
5851 		return rc;
5852 
5853 	invld_params.range_invalidation = false;
5854 	invld_params.inv_start_val = 1;
5855 
5856 	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5857 		invld_params.flags = flags;
5858 		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5859 		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5860 										&invld_params);
5861 	} else if (flags & MMU_OP_PHYS_PACK) {
5862 		invld_params.flags = 0;
5863 		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5864 	}
5865 
5866 	return rc;
5867 }
5868 
gaudi2_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 flags,u32 asid,u64 va,u64 size)5869 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
5870 				u32 flags, u32 asid, u64 va, u64 size)
5871 {
5872 	struct gaudi2_cache_invld_params invld_params = {0};
5873 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5874 	u64 start_va, end_va;
5875 	u32 inv_start_val;
5876 	int rc = 0;
5877 
5878 	if (hdev->reset_info.hard_reset_pending)
5879 		return 0;
5880 
5881 	inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT |
5882 			1 << MMU_RANGE_INV_ASID_EN_SHIFT |
5883 			asid << MMU_RANGE_INV_ASID_SHIFT);
5884 	start_va = va;
5885 	end_va = start_va + size;
5886 
5887 	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5888 		/* As range invalidation does not support zero address we will
5889 		 * do full invalidation in this case
5890 		 */
5891 		if (start_va) {
5892 			invld_params.range_invalidation = true;
5893 			invld_params.start_va = start_va;
5894 			invld_params.end_va = end_va;
5895 			invld_params.inv_start_val = inv_start_val;
5896 			invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE;
5897 		} else {
5898 			invld_params.range_invalidation = false;
5899 			invld_params.inv_start_val = 1;
5900 			invld_params.flags = flags;
5901 		}
5902 
5903 
5904 		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5905 		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5906 										&invld_params);
5907 		if (rc)
5908 			return rc;
5909 
5910 	} else if (flags & MMU_OP_PHYS_PACK) {
5911 		invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va);
5912 		invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va);
5913 		invld_params.inv_start_val = inv_start_val;
5914 		invld_params.flags = flags;
5915 		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5916 	}
5917 
5918 	return rc;
5919 }
5920 
gaudi2_mmu_update_hop0_addr(struct hl_device * hdev,u32 stlb_base,bool host_resident_pgt)5921 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base,
5922 									bool host_resident_pgt)
5923 {
5924 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5925 	u64 hop0_addr;
5926 	u32 asid, max_asid = prop->max_asid;
5927 	int rc;
5928 
5929 	/* it takes too much time to init all of the ASIDs on palladium */
5930 	if (hdev->pldm)
5931 		max_asid = min((u32) 8, max_asid);
5932 
5933 	for (asid = 0 ; asid < max_asid ; asid++) {
5934 		if (host_resident_pgt)
5935 			hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
5936 		else
5937 			hop0_addr = prop->mmu_pgt_addr + (asid * prop->dmmu.hop_table_size);
5938 
5939 		rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
5940 		if (rc) {
5941 			dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
5942 			return rc;
5943 		}
5944 	}
5945 
5946 	return 0;
5947 }
5948 
gaudi2_mmu_init_common(struct hl_device * hdev,u32 mmu_base,u32 stlb_base,bool host_resident_pgt)5949 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base,
5950 								bool host_resident_pgt)
5951 {
5952 	u32 status, timeout_usec;
5953 	int rc;
5954 
5955 	if (hdev->pldm || !hdev->pdev)
5956 		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5957 	else
5958 		timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5959 
5960 	WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1);
5961 
5962 	rc = hl_poll_timeout(
5963 		hdev,
5964 		stlb_base + STLB_SRAM_INIT_OFFSET,
5965 		status,
5966 		!status,
5967 		1000,
5968 		timeout_usec);
5969 
5970 	if (rc)
5971 		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
5972 
5973 	rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base, host_resident_pgt);
5974 	if (rc)
5975 		return rc;
5976 
5977 	WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
5978 
5979 	rc = hl_poll_timeout(
5980 		hdev,
5981 		stlb_base + STLB_INV_ALL_START_OFFSET,
5982 		status,
5983 		!status,
5984 		1000,
5985 		timeout_usec);
5986 
5987 	if (rc)
5988 		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n");
5989 
5990 	WREG32(mmu_base + MMU_ENABLE_OFFSET, 1);
5991 
5992 	return rc;
5993 }
5994 
gaudi2_pci_mmu_init(struct hl_device * hdev)5995 static int gaudi2_pci_mmu_init(struct hl_device *hdev)
5996 {
5997 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5998 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5999 	u32 mmu_base, stlb_base;
6000 	int rc;
6001 
6002 	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU)
6003 		return 0;
6004 
6005 	mmu_base = mmPMMU_HBW_MMU_BASE;
6006 	stlb_base = mmPMMU_HBW_STLB_BASE;
6007 
6008 	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
6009 		(0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
6010 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
6011 		(4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
6012 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) |
6013 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT),
6014 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
6015 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
6016 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
6017 		PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
6018 		PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
6019 
6020 	WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0);
6021 
6022 	if (PAGE_SIZE == SZ_64K) {
6023 		/* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
6024 		RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
6025 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
6026 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
6027 			FIELD_PREP(
6028 				DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK,
6029 				1),
6030 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK |
6031 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK |
6032 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
6033 	}
6034 
6035 	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
6036 
6037 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base, prop->pmmu.host_resident);
6038 	if (rc)
6039 		return rc;
6040 
6041 	gaudi2->hw_cap_initialized |= HW_CAP_PMMU;
6042 
6043 	return 0;
6044 }
6045 
gaudi2_dcore_hmmu_init(struct hl_device * hdev,int dcore_id,int hmmu_id)6046 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
6047 				int hmmu_id)
6048 {
6049 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6050 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6051 	u32 offset, mmu_base, stlb_base, hw_cap;
6052 	u8 dmmu_seq;
6053 	int rc;
6054 
6055 	dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id;
6056 	hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq;
6057 
6058 	/*
6059 	 * return if DMMU is already initialized or if it's not out of
6060 	 * isolation (due to cluster binning)
6061 	 */
6062 	if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq)))
6063 		return 0;
6064 
6065 	offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
6066 	mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset;
6067 	stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset;
6068 
6069 	RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
6070 			MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
6071 
6072 	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
6073 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
6074 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
6075 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
6076 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) |
6077 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3),
6078 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
6079 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
6080 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
6081 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
6082 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
6083 
6084 	RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
6085 			STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
6086 
6087 	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
6088 
6089 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base, prop->dmmu.host_resident);
6090 	if (rc)
6091 		return rc;
6092 
6093 	gaudi2->hw_cap_initialized |= hw_cap;
6094 
6095 	return 0;
6096 }
6097 
gaudi2_hbm_mmu_init(struct hl_device * hdev)6098 static int gaudi2_hbm_mmu_init(struct hl_device *hdev)
6099 {
6100 	int rc, dcore_id, hmmu_id;
6101 
6102 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
6103 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) {
6104 			rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id);
6105 			if (rc)
6106 				return rc;
6107 		}
6108 
6109 	return 0;
6110 }
6111 
gaudi2_mmu_init(struct hl_device * hdev)6112 static int gaudi2_mmu_init(struct hl_device *hdev)
6113 {
6114 	int rc;
6115 
6116 	rc = gaudi2_pci_mmu_init(hdev);
6117 	if (rc)
6118 		return rc;
6119 
6120 	rc = gaudi2_hbm_mmu_init(hdev);
6121 	if (rc)
6122 		return rc;
6123 
6124 	return 0;
6125 }
6126 
gaudi2_hw_init(struct hl_device * hdev)6127 static int gaudi2_hw_init(struct hl_device *hdev)
6128 {
6129 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6130 	int rc;
6131 
6132 	/* Let's mark in the H/W that we have reached this point. We check
6133 	 * this value in the reset_before_init function to understand whether
6134 	 * we need to reset the chip before doing H/W init. This register is
6135 	 * cleared by the H/W upon H/W reset
6136 	 */
6137 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
6138 
6139 	/* Perform read from the device to make sure device is up */
6140 	RREG32(mmHW_STATE);
6141 
6142 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
6143 	 * So we set it here and if anyone tries to move it later to
6144 	 * a different address, there will be an error
6145 	 */
6146 	if (hdev->asic_prop.iatu_done_by_fw)
6147 		gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE;
6148 
6149 	/*
6150 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
6151 	 * base address of dram
6152 	 */
6153 	if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
6154 		dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n");
6155 		return -EIO;
6156 	}
6157 
6158 	rc = gaudi2_init_cpu(hdev);
6159 	if (rc) {
6160 		dev_err(hdev->dev, "failed to initialize CPU\n");
6161 		return rc;
6162 	}
6163 
6164 	gaudi2_init_scrambler_hbm(hdev);
6165 	gaudi2_init_kdma(hdev);
6166 
6167 	rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC);
6168 	if (rc) {
6169 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc);
6170 		return rc;
6171 	}
6172 
6173 	rc = gaudi2->cpucp_info_get(hdev);
6174 	if (rc) {
6175 		dev_err(hdev->dev, "Failed to get cpucp info\n");
6176 		return rc;
6177 	}
6178 
6179 	rc = gaudi2_mmu_init(hdev);
6180 	if (rc)
6181 		return rc;
6182 
6183 	gaudi2_init_pdma(hdev);
6184 	gaudi2_init_edma(hdev);
6185 	gaudi2_init_sm(hdev);
6186 	gaudi2_init_tpc(hdev);
6187 	gaudi2_init_mme(hdev);
6188 	gaudi2_init_rotator(hdev);
6189 	gaudi2_init_dec(hdev);
6190 	gaudi2_enable_timestamp(hdev);
6191 
6192 	rc = gaudi2_coresight_init(hdev);
6193 	if (rc)
6194 		goto disable_queues;
6195 
6196 	rc = gaudi2_enable_msix(hdev);
6197 	if (rc)
6198 		goto disable_queues;
6199 
6200 	/* Perform read from the device to flush all configuration */
6201 	RREG32(mmHW_STATE);
6202 
6203 	return 0;
6204 
6205 disable_queues:
6206 	gaudi2_disable_dma_qmans(hdev);
6207 	gaudi2_disable_mme_qmans(hdev);
6208 	gaudi2_disable_tpc_qmans(hdev);
6209 	gaudi2_disable_rot_qmans(hdev);
6210 	gaudi2_disable_nic_qmans(hdev);
6211 
6212 	gaudi2_disable_timestamp(hdev);
6213 
6214 	return rc;
6215 }
6216 
6217 /**
6218  * gaudi2_send_hard_reset_cmd - common function to handle reset
6219  *
6220  * @hdev: pointer to the habanalabs device structure
6221  *
6222  * This function handles the various possible scenarios for reset.
6223  * It considers if reset is handled by driver\FW and what FW components are loaded
6224  */
gaudi2_send_hard_reset_cmd(struct hl_device * hdev)6225 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
6226 {
6227 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6228 	bool heartbeat_reset, preboot_only, cpu_initialized = false;
6229 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6230 	u32 cpu_boot_status;
6231 
6232 	preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU);
6233 	heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT);
6234 
6235 	/*
6236 	 * Handle corner case where failure was at cpu management app load,
6237 	 * and driver didn't detect any failure while loading the FW,
6238 	 * then at such scenario driver will send only HALT_MACHINE
6239 	 * and no one will respond to this request since FW already back to preboot
6240 	 * and it cannot handle such cmd.
6241 	 * In this case next time the management app loads it'll check on events register
6242 	 * which will still have the halt indication, and will reboot the device.
6243 	 * The solution is to let preboot clear all relevant registers before next boot
6244 	 * once driver send COMMS_RST_DEV.
6245 	 */
6246 	cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS);
6247 
6248 	if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) &&
6249 			(cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL))
6250 		cpu_initialized = true;
6251 
6252 	/*
6253 	 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways:
6254 	 * 1. FW reset: FW initiate the reset sequence
6255 	 * 2. driver reset: FW will start HALT sequence (the preparations for the
6256 	 *                  reset but not the reset itself as it is not implemented
6257 	 *                  on their part) and LKD will wait to let FW complete the
6258 	 *                  sequence before issuing the reset
6259 	 */
6260 	if (!preboot_only && cpu_initialized) {
6261 		WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq),
6262 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id);
6263 
6264 		msleep(GAUDI2_CPU_RESET_WAIT_MSEC);
6265 	}
6266 
6267 	/*
6268 	 * When working with preboot (without Linux/Boot fit) we can
6269 	 * communicate only using the COMMS commands to issue halt/reset.
6270 	 *
6271 	 * For the case in which we are working with Linux/Bootfit this is a hail-mary
6272 	 * attempt to revive the card in the small chance that the f/w has
6273 	 * experienced a watchdog event, which caused it to return back to preboot.
6274 	 * In that case, triggering reset through GIC won't help. We need to
6275 	 * trigger the reset as if Linux wasn't loaded.
6276 	 *
6277 	 * We do it only if the reset cause was HB, because that would be the
6278 	 * indication of such an event.
6279 	 *
6280 	 * In case watchdog hasn't expired but we still got HB, then this won't
6281 	 * do any damage.
6282 	 */
6283 
6284 	if (heartbeat_reset || preboot_only || !cpu_initialized) {
6285 		if (hdev->asic_prop.hard_reset_done_by_fw)
6286 			hl_fw_ask_hard_reset_without_linux(hdev);
6287 		else
6288 			hl_fw_ask_halt_machine_without_linux(hdev);
6289 	}
6290 }
6291 
6292 /**
6293  * gaudi2_execute_hard_reset - execute hard reset by driver/FW
6294  *
6295  * @hdev: pointer to the habanalabs device structure
6296  *
6297  * This function executes hard reset based on if driver/FW should do the reset
6298  */
gaudi2_execute_hard_reset(struct hl_device * hdev)6299 static void gaudi2_execute_hard_reset(struct hl_device *hdev)
6300 {
6301 	if (hdev->asic_prop.hard_reset_done_by_fw) {
6302 		gaudi2_send_hard_reset_cmd(hdev);
6303 		return;
6304 	}
6305 
6306 	/* Set device to handle FLR by H/W as we will put the device
6307 	 * CPU to halt mode
6308 	 */
6309 	WREG32(mmPCIE_AUX_FLR_CTRL,
6310 			(PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
6311 
6312 	gaudi2_send_hard_reset_cmd(hdev);
6313 
6314 	WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
6315 }
6316 
6317 /**
6318  * gaudi2_execute_soft_reset - execute soft reset by driver/FW
6319  *
6320  * @hdev: pointer to the habanalabs device structure
6321  * @driver_performs_reset: true if driver should perform reset instead of f/w.
6322  * @poll_timeout_us: time to wait for response from f/w.
6323  *
6324  * This function executes soft reset based on if driver/FW should do the reset
6325  */
gaudi2_execute_soft_reset(struct hl_device * hdev,bool driver_performs_reset,u32 poll_timeout_us)6326 static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset,
6327 						u32 poll_timeout_us)
6328 {
6329 	if (!driver_performs_reset)
6330 		return hl_fw_send_soft_reset(hdev);
6331 
6332 	/* Block access to engines, QMANs and SM during reset, these
6333 	 * RRs will be reconfigured after soft reset.
6334 	 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset.
6335 	 */
6336 	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1,
6337 					mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE);
6338 
6339 	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2,
6340 				mmPCIE_MSIX_BASE + HL_BLOCK_SIZE,
6341 				mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
6342 
6343 	WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
6344 	return 0;
6345 }
6346 
gaudi2_poll_btm_indication(struct hl_device * hdev,u32 poll_timeout_us)6347 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 poll_timeout_us)
6348 {
6349 	int i, rc = 0;
6350 	u32 reg_val;
6351 
6352 	/* We poll the BTM done indication multiple times after reset due to
6353 	 * a HW errata 'GAUDI2_0300'
6354 	 */
6355 	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6356 		rc = hl_poll_timeout(
6357 			hdev,
6358 			mmPSOC_GLOBAL_CONF_BTM_FSM,
6359 			reg_val,
6360 			reg_val == 0,
6361 			1000,
6362 			poll_timeout_us);
6363 
6364 	if (rc)
6365 		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
6366 }
6367 
gaudi2_hw_fini(struct hl_device * hdev,bool hard_reset,bool fw_reset)6368 static int gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
6369 {
6370 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6371 	u32 poll_timeout_us, reset_sleep_ms;
6372 	bool driver_performs_reset = false;
6373 	int rc;
6374 
6375 	if (hdev->pldm) {
6376 		reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
6377 						GAUDI2_PLDM_SRESET_TIMEOUT_MSEC;
6378 		poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC;
6379 	} else {
6380 		reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC;
6381 		poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC;
6382 	}
6383 
6384 	if (fw_reset)
6385 		goto skip_reset;
6386 
6387 	gaudi2_reset_arcs(hdev);
6388 
6389 	if (hard_reset) {
6390 		driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
6391 		gaudi2_execute_hard_reset(hdev);
6392 	} else {
6393 		/*
6394 		 * As we have to support also work with preboot only (which does not supports
6395 		 * soft reset) we have to make sure that security is disabled before letting driver
6396 		 * do the reset. user shall control the BFE flags to avoid asking soft reset in
6397 		 * secured device with preboot only.
6398 		 */
6399 		driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
6400 							!hdev->asic_prop.fw_security_enabled);
6401 		rc = gaudi2_execute_soft_reset(hdev, driver_performs_reset, poll_timeout_us);
6402 		if (rc)
6403 			return rc;
6404 	}
6405 
6406 skip_reset:
6407 	if (driver_performs_reset || hard_reset) {
6408 		/*
6409 		 * Instead of waiting for BTM indication we should wait for preboot ready:
6410 		 * Consider the below scenario:
6411 		 * 1. FW update is being triggered
6412 		 *        - setting the dirty bit
6413 		 * 2. hard reset will be triggered due to the dirty bit
6414 		 * 3. FW initiates the reset:
6415 		 *        - dirty bit cleared
6416 		 *        - BTM indication cleared
6417 		 *        - preboot ready indication cleared
6418 		 * 4. during hard reset:
6419 		 *        - BTM indication will be set
6420 		 *        - BIST test performed and another reset triggered
6421 		 * 5. only after this reset the preboot will set the preboot ready
6422 		 *
6423 		 * when polling on BTM indication alone we can lose sync with FW while trying to
6424 		 * communicate with FW that is during reset.
6425 		 * to overcome this we will always wait to preboot ready indication
6426 		 */
6427 
6428 		/* without this sleep reset will not work */
6429 		msleep(reset_sleep_ms);
6430 
6431 		if (hdev->fw_components & FW_TYPE_PREBOOT_CPU)
6432 			hl_fw_wait_preboot_ready(hdev);
6433 		else
6434 			gaudi2_poll_btm_indication(hdev, poll_timeout_us);
6435 	}
6436 
6437 	if (!gaudi2)
6438 		return 0;
6439 
6440 	gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
6441 	gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
6442 
6443 	/*
6444 	 * Clear NIC capability mask in order for driver to re-configure
6445 	 * NIC QMANs. NIC ports will not be re-configured during soft
6446 	 * reset as we call gaudi2_nic_init only during hard reset
6447 	 */
6448 	gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK);
6449 
6450 	if (hard_reset) {
6451 		gaudi2->hw_cap_initialized &=
6452 			~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK |
6453 			HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q |
6454 			HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK |
6455 			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA |
6456 			HW_CAP_MME_MASK | HW_CAP_ROT_MASK);
6457 
6458 		memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat));
6459 	} else {
6460 		gaudi2->hw_cap_initialized &=
6461 			~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET |
6462 			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
6463 			HW_CAP_ROT_MASK);
6464 	}
6465 	return 0;
6466 }
6467 
gaudi2_suspend(struct hl_device * hdev)6468 static int gaudi2_suspend(struct hl_device *hdev)
6469 {
6470 	return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
6471 }
6472 
gaudi2_resume(struct hl_device * hdev)6473 static int gaudi2_resume(struct hl_device *hdev)
6474 {
6475 	return gaudi2_init_iatu(hdev);
6476 }
6477 
gaudi2_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)6478 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
6479 		void *cpu_addr, dma_addr_t dma_addr, size_t size)
6480 {
6481 	int rc;
6482 
6483 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
6484 			VM_DONTCOPY | VM_NORESERVE);
6485 
6486 #ifdef _HAS_DMA_MMAP_COHERENT
6487 
6488 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
6489 	if (rc)
6490 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
6491 
6492 #else
6493 
6494 	rc = remap_pfn_range(vma, vma->vm_start,
6495 				virt_to_phys(cpu_addr) >> PAGE_SHIFT,
6496 				size, vma->vm_page_prot);
6497 	if (rc)
6498 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
6499 
6500 #endif
6501 
6502 	return rc;
6503 }
6504 
gaudi2_is_queue_enabled(struct hl_device * hdev,u32 hw_queue_id)6505 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
6506 {
6507 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6508 	u64 hw_cap_mask = 0;
6509 	u64 hw_tpc_cap_bit = 0;
6510 	u64 hw_nic_cap_bit = 0;
6511 	u64 hw_test_cap_bit = 0;
6512 
6513 	switch (hw_queue_id) {
6514 	case GAUDI2_QUEUE_ID_PDMA_0_0:
6515 	case GAUDI2_QUEUE_ID_PDMA_0_1:
6516 	case GAUDI2_QUEUE_ID_PDMA_1_0:
6517 		hw_cap_mask = HW_CAP_PDMA_MASK;
6518 		break;
6519 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
6520 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
6521 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
6522 		break;
6523 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
6524 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE +
6525 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2);
6526 		break;
6527 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
6528 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE +
6529 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2);
6530 		break;
6531 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
6532 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE +
6533 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2);
6534 		break;
6535 
6536 	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
6537 		hw_test_cap_bit = HW_CAP_MME_SHIFT;
6538 		break;
6539 
6540 	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
6541 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 1;
6542 		break;
6543 
6544 	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
6545 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 2;
6546 		break;
6547 
6548 	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
6549 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 3;
6550 		break;
6551 
6552 	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3:
6553 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT +
6554 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2);
6555 
6556 		/* special case where cap bit refers to the first queue id */
6557 		if (!hw_tpc_cap_bit)
6558 			return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0));
6559 		break;
6560 
6561 	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
6562 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE +
6563 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2);
6564 		break;
6565 
6566 	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
6567 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) +
6568 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2);
6569 		break;
6570 
6571 	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
6572 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) +
6573 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2);
6574 		break;
6575 
6576 	case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
6577 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE);
6578 		break;
6579 
6580 	case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3:
6581 		hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2);
6582 		break;
6583 
6584 	case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3:
6585 		hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2);
6586 
6587 		/* special case where cap bit refers to the first queue id */
6588 		if (!hw_nic_cap_bit)
6589 			return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0));
6590 		break;
6591 
6592 	case GAUDI2_QUEUE_ID_CPU_PQ:
6593 		return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q);
6594 
6595 	default:
6596 		return false;
6597 	}
6598 
6599 	if (hw_tpc_cap_bit)
6600 		return  !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit));
6601 
6602 	if (hw_nic_cap_bit)
6603 		return  !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit));
6604 
6605 	if (hw_test_cap_bit)
6606 		hw_cap_mask = BIT_ULL(hw_test_cap_bit);
6607 
6608 	return !!(gaudi2->hw_cap_initialized & hw_cap_mask);
6609 }
6610 
gaudi2_is_arc_enabled(struct hl_device * hdev,u64 arc_id)6611 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id)
6612 {
6613 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6614 
6615 	switch (arc_id) {
6616 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6617 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6618 		return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id));
6619 
6620 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6621 		return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6622 
6623 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6624 		return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6625 
6626 	default:
6627 		return false;
6628 	}
6629 }
6630 
gaudi2_clr_arc_id_cap(struct hl_device * hdev,u64 arc_id)6631 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6632 {
6633 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6634 
6635 	switch (arc_id) {
6636 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6637 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6638 		gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id));
6639 		break;
6640 
6641 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6642 		gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6643 		break;
6644 
6645 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6646 		gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6647 		break;
6648 
6649 	default:
6650 		return;
6651 	}
6652 }
6653 
gaudi2_set_arc_id_cap(struct hl_device * hdev,u64 arc_id)6654 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6655 {
6656 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6657 
6658 	switch (arc_id) {
6659 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6660 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6661 		gaudi2->active_hw_arc |= BIT_ULL(arc_id);
6662 		break;
6663 
6664 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6665 		gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0);
6666 		break;
6667 
6668 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6669 		gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0);
6670 		break;
6671 
6672 	default:
6673 		return;
6674 	}
6675 }
6676 
gaudi2_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)6677 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
6678 {
6679 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6680 	u32 pq_offset, reg_base, db_reg_offset, db_value;
6681 
6682 	if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) {
6683 		/*
6684 		 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs.
6685 		 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ
6686 		 * number.
6687 		 */
6688 		pq_offset = (hw_queue_id & 0x3) * 4;
6689 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6690 		db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset;
6691 	} else {
6692 		db_reg_offset = mmCPU_IF_PF_PQ_PI;
6693 	}
6694 
6695 	db_value = pi;
6696 
6697 	/* ring the doorbell */
6698 	WREG32(db_reg_offset, db_value);
6699 
6700 	if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) {
6701 		/* make sure device CPU will read latest data from host */
6702 		mb();
6703 		WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
6704 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
6705 	}
6706 }
6707 
gaudi2_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)6708 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
6709 {
6710 	__le64 *pbd = (__le64 *) bd;
6711 
6712 	/* The QMANs are on the host memory so a simple copy suffice */
6713 	pqe[0] = pbd[0];
6714 	pqe[1] = pbd[1];
6715 }
6716 
gaudi2_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)6717 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size,
6718 				dma_addr_t *dma_handle, gfp_t flags)
6719 {
6720 	return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
6721 }
6722 
gaudi2_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)6723 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size,
6724 				void *cpu_addr, dma_addr_t dma_handle)
6725 {
6726 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
6727 }
6728 
gaudi2_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)6729 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
6730 				u32 timeout, u64 *result)
6731 {
6732 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6733 
6734 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) {
6735 		if (result)
6736 			*result = 0;
6737 		return 0;
6738 	}
6739 
6740 	if (!timeout)
6741 		timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC;
6742 
6743 	return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result);
6744 }
6745 
gaudi2_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)6746 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size,
6747 				gfp_t mem_flags, dma_addr_t *dma_handle)
6748 {
6749 	if (size > GAUDI2_DMA_POOL_BLK_SIZE)
6750 		return NULL;
6751 
6752 	return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
6753 }
6754 
gaudi2_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)6755 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
6756 {
6757 	dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
6758 }
6759 
gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)6760 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
6761 						dma_addr_t *dma_handle)
6762 {
6763 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
6764 }
6765 
gaudi2_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)6766 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
6767 {
6768 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
6769 }
6770 
gaudi2_validate_cb_address(struct hl_device * hdev,struct hl_cs_parser * parser)6771 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
6772 {
6773 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
6774 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6775 
6776 	if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
6777 		dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
6778 		return -EINVAL;
6779 	}
6780 
6781 	/* Just check if CB address is valid */
6782 
6783 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6784 					parser->user_cb_size,
6785 					asic_prop->sram_user_base_address,
6786 					asic_prop->sram_end_address))
6787 		return 0;
6788 
6789 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6790 					parser->user_cb_size,
6791 					asic_prop->dram_user_base_address,
6792 					asic_prop->dram_end_address))
6793 		return 0;
6794 
6795 	if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) &&
6796 		hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6797 						parser->user_cb_size,
6798 						asic_prop->dmmu.start_addr,
6799 						asic_prop->dmmu.end_addr))
6800 		return 0;
6801 
6802 	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) {
6803 		if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6804 					parser->user_cb_size,
6805 					asic_prop->pmmu.start_addr,
6806 					asic_prop->pmmu.end_addr) ||
6807 			hl_mem_area_inside_range(
6808 					(u64) (uintptr_t) parser->user_cb,
6809 					parser->user_cb_size,
6810 					asic_prop->pmmu_huge.start_addr,
6811 					asic_prop->pmmu_huge.end_addr))
6812 			return 0;
6813 
6814 	} else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) {
6815 		if (!hdev->pdev)
6816 			return 0;
6817 
6818 		if (!device_iommu_mapped(&hdev->pdev->dev))
6819 			return 0;
6820 	}
6821 
6822 	dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n",
6823 		parser->user_cb, parser->user_cb_size);
6824 
6825 	return -EFAULT;
6826 }
6827 
gaudi2_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)6828 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
6829 {
6830 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6831 
6832 	if (!parser->is_kernel_allocated_cb)
6833 		return gaudi2_validate_cb_address(hdev, parser);
6834 
6835 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
6836 		dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n");
6837 		return -EINVAL;
6838 	}
6839 
6840 	return 0;
6841 }
6842 
gaudi2_send_heartbeat(struct hl_device * hdev)6843 static int gaudi2_send_heartbeat(struct hl_device *hdev)
6844 {
6845 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6846 
6847 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6848 		return 0;
6849 
6850 	return hl_fw_send_heartbeat(hdev);
6851 }
6852 
6853 /* This is an internal helper function, used to update the KDMA mmu props.
6854  * Should be called with a proper kdma lock.
6855  */
gaudi2_kdma_set_mmbp_asid(struct hl_device * hdev,bool mmu_bypass,u32 asid)6856 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
6857 					   bool mmu_bypass, u32 asid)
6858 {
6859 	u32 rw_asid, rw_mmu_bp;
6860 
6861 	rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6862 		      (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6863 
6864 	rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) |
6865 			(!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT);
6866 
6867 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid);
6868 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
6869 }
6870 
gaudi2_arm_cq_monitor(struct hl_device * hdev,u32 sob_id,u32 mon_id,u32 cq_id,u32 mon_payload,u32 sync_value)6871 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
6872 						u32 mon_payload, u32 sync_value)
6873 {
6874 	u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
6875 	u8 mask;
6876 
6877 	sob_offset = sob_id * 4;
6878 	mon_offset = mon_id * 4;
6879 
6880 	/* Reset the SOB value */
6881 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
6882 
6883 	/* Configure this address with CQ_ID 0 because CQ_EN is set */
6884 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
6885 
6886 	/* Configure this address with CS index because CQ_EN is set */
6887 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
6888 
6889 	sync_group_id = sob_id / 8;
6890 	mask = ~(1 << (sob_id & 0x7));
6891 	mode = 1; /* comparison mode is "equal to" */
6892 
6893 	mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
6894 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
6895 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
6896 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
6897 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
6898 }
6899 
6900 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
gaudi2_send_job_to_kdma(struct hl_device * hdev,u64 src_addr,u64 dst_addr,u32 size,bool is_memset)6901 static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
6902 					u64 src_addr, u64 dst_addr,
6903 					u32 size, bool is_memset)
6904 {
6905 	u32 comp_val, commit_mask, *polling_addr, timeout, status = 0;
6906 	struct hl_cq_entry *cq_base;
6907 	struct hl_cq *cq;
6908 	u64 comp_addr;
6909 	int rc;
6910 
6911 	gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
6912 				GAUDI2_RESERVED_MON_KDMA_COMPLETION,
6913 				GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
6914 
6915 	comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6916 			(GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
6917 
6918 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
6919 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
6920 
6921 	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr));
6922 	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr));
6923 	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr));
6924 	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr));
6925 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr));
6926 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr));
6927 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val);
6928 	WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size);
6929 
6930 	commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
6931 				FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
6932 
6933 	if (is_memset)
6934 		commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1);
6935 
6936 	WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask);
6937 
6938 	/* Wait for completion */
6939 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION];
6940 	cq_base = cq->kernel_address;
6941 	polling_addr = (u32 *)&cq_base[cq->ci];
6942 
6943 	if (hdev->pldm)
6944 		/* for each 1MB 20 second of timeout */
6945 		timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20;
6946 	else
6947 		timeout = KDMA_TIMEOUT_USEC;
6948 
6949 	/* Polling */
6950 	rc = hl_poll_timeout_memory(
6951 			hdev,
6952 			polling_addr,
6953 			status,
6954 			(status == 1),
6955 			1000,
6956 			timeout,
6957 			true);
6958 
6959 	*polling_addr = 0;
6960 
6961 	if (rc) {
6962 		dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n");
6963 		WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT);
6964 		return rc;
6965 	}
6966 
6967 	cq->ci = hl_cq_inc_ptr(cq->ci);
6968 
6969 	return 0;
6970 }
6971 
gaudi2_memset_device_lbw(struct hl_device * hdev,u32 addr,u32 size,u32 val)6972 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val)
6973 {
6974 	u32 i;
6975 
6976 	for (i = 0 ; i < size ; i += sizeof(u32))
6977 		WREG32(addr + i, val);
6978 }
6979 
gaudi2_qman_set_test_mode(struct hl_device * hdev,u32 hw_queue_id,bool enable)6980 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable)
6981 {
6982 	u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6983 
6984 	if (enable) {
6985 		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE);
6986 		WREG32(reg_base + QM_PQC_CFG_OFFSET, 0);
6987 	} else {
6988 		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED);
6989 		WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
6990 	}
6991 }
6992 
gaudi2_test_queue_hw_queue_id_to_sob_id(struct hl_device * hdev,u32 hw_queue_id)6993 static inline u32 gaudi2_test_queue_hw_queue_id_to_sob_id(struct hl_device *hdev, u32 hw_queue_id)
6994 {
6995 	return hdev->asic_prop.first_available_user_sob[0] +
6996 				hw_queue_id - GAUDI2_QUEUE_ID_PDMA_0_0;
6997 }
6998 
gaudi2_test_queue_clear(struct hl_device * hdev,u32 hw_queue_id)6999 static void gaudi2_test_queue_clear(struct hl_device *hdev, u32 hw_queue_id)
7000 {
7001 	u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
7002 	u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
7003 
7004 	/* Reset the SOB value */
7005 	WREG32(sob_addr, 0);
7006 }
7007 
gaudi2_test_queue_send_msg_short(struct hl_device * hdev,u32 hw_queue_id,u32 sob_val,struct gaudi2_queues_test_info * msg_info)7008 static int gaudi2_test_queue_send_msg_short(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val,
7009 					    struct gaudi2_queues_test_info *msg_info)
7010 {
7011 	u32 sob_offset =  gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
7012 	u32 tmp, sob_base = 1;
7013 	struct packet_msg_short *msg_short_pkt = msg_info->kern_addr;
7014 	size_t pkt_size = sizeof(struct packet_msg_short);
7015 	int rc;
7016 
7017 	tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
7018 		(1 << GAUDI2_PKT_CTL_EB_SHIFT) |
7019 		(1 << GAUDI2_PKT_CTL_MB_SHIFT) |
7020 		(sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) |
7021 		(sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT);
7022 
7023 	msg_short_pkt->value = cpu_to_le32(sob_val);
7024 	msg_short_pkt->ctl = cpu_to_le32(tmp);
7025 
7026 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, msg_info->dma_addr);
7027 	if (rc)
7028 		dev_err(hdev->dev,
7029 			"Failed to send msg_short packet to H/W queue %d\n", hw_queue_id);
7030 
7031 	return rc;
7032 }
7033 
gaudi2_test_queue_wait_completion(struct hl_device * hdev,u32 hw_queue_id,u32 sob_val)7034 static int gaudi2_test_queue_wait_completion(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val)
7035 {
7036 	u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
7037 	u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
7038 	u32 timeout_usec, tmp;
7039 	int rc;
7040 
7041 	if (hdev->pldm)
7042 		timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
7043 	else
7044 		timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
7045 
7046 	rc = hl_poll_timeout(
7047 			hdev,
7048 			sob_addr,
7049 			tmp,
7050 			(tmp == sob_val),
7051 			1000,
7052 			timeout_usec);
7053 
7054 	if (rc == -ETIMEDOUT) {
7055 		dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
7056 			hw_queue_id, tmp);
7057 		rc = -EIO;
7058 	}
7059 
7060 	return rc;
7061 }
7062 
gaudi2_test_cpu_queue(struct hl_device * hdev)7063 static int gaudi2_test_cpu_queue(struct hl_device *hdev)
7064 {
7065 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7066 
7067 	/*
7068 	 * check capability here as send_cpu_message() won't update the result
7069 	 * value if no capability
7070 	 */
7071 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
7072 		return 0;
7073 
7074 	return hl_fw_test_cpu_queue(hdev);
7075 }
7076 
gaudi2_test_queues(struct hl_device * hdev)7077 static int gaudi2_test_queues(struct hl_device *hdev)
7078 {
7079 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7080 	struct gaudi2_queues_test_info *msg_info;
7081 	u32 sob_val = 0x5a5a;
7082 	int i, rc;
7083 
7084 	/* send test message on all enabled Qs */
7085 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
7086 		if (!gaudi2_is_queue_enabled(hdev, i) || gaudi2_is_edma_queue_id(i))
7087 			continue;
7088 
7089 		msg_info = &gaudi2->queues_test_info[i - GAUDI2_QUEUE_ID_PDMA_0_0];
7090 		gaudi2_qman_set_test_mode(hdev, i, true);
7091 		gaudi2_test_queue_clear(hdev, i);
7092 		rc = gaudi2_test_queue_send_msg_short(hdev, i, sob_val, msg_info);
7093 		if (rc)
7094 			goto done;
7095 	}
7096 
7097 	rc = gaudi2_test_cpu_queue(hdev);
7098 	if (rc)
7099 		goto done;
7100 
7101 	/* verify that all messages were processed */
7102 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
7103 		if (!gaudi2_is_queue_enabled(hdev, i) || gaudi2_is_edma_queue_id(i))
7104 			continue;
7105 
7106 		rc = gaudi2_test_queue_wait_completion(hdev, i, sob_val);
7107 		if (rc)
7108 			/* chip is not usable, no need for cleanups, just bail-out with error */
7109 			goto done;
7110 
7111 		gaudi2_test_queue_clear(hdev, i);
7112 		gaudi2_qman_set_test_mode(hdev, i, false);
7113 	}
7114 
7115 done:
7116 	return rc;
7117 }
7118 
gaudi2_compute_reset_late_init(struct hl_device * hdev)7119 static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
7120 {
7121 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7122 	size_t irq_arr_size;
7123 	int rc;
7124 
7125 	gaudi2_init_arcs(hdev);
7126 
7127 	rc = gaudi2_scrub_arcs_dccm(hdev);
7128 	if (rc) {
7129 		dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
7130 		return rc;
7131 	}
7132 
7133 	gaudi2_init_security(hdev);
7134 
7135 	/* Unmask all IRQs since some could have been received during the soft reset */
7136 	irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]);
7137 	return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
7138 }
7139 
gaudi2_get_edma_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7140 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7141 		struct engines_data *e)
7142 {
7143 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7144 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7145 	unsigned long *mask = (unsigned long *) mask_arr;
7146 	const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#-15x%#x\n";
7147 	bool is_idle = true, is_eng_idle;
7148 	int engine_idx, i, j;
7149 	u64 offset;
7150 
7151 	if (e)
7152 		hl_engine_data_sprintf(e,
7153 			"\nCORE  EDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0  DMA_CORE_STS1\n"
7154 			"----  ----  -------  ------------  -------------  -------------\n");
7155 
7156 	for (i = 0; i < NUM_OF_DCORES; i++) {
7157 		for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
7158 			int seq = i * NUM_OF_EDMA_PER_DCORE + j;
7159 
7160 			if (!(prop->edma_enabled_mask & BIT(seq)))
7161 				continue;
7162 
7163 			engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 +
7164 					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7165 			offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
7166 
7167 			dma_core_sts0 = RREG32(mmDCORE0_EDMA0_CORE_STS0 + offset);
7168 			dma_core_sts1 = RREG32(mmDCORE0_EDMA0_CORE_STS1 + offset);
7169 
7170 			qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
7171 			qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
7172 			qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
7173 
7174 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7175 					IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7176 			is_idle &= is_eng_idle;
7177 
7178 			if (mask && !is_eng_idle)
7179 				set_bit(engine_idx, mask);
7180 
7181 			if (e)
7182 				hl_engine_data_sprintf(e, edma_fmt, i, j, is_eng_idle ? "Y" : "N",
7183 							qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7184 		}
7185 	}
7186 
7187 	return is_idle;
7188 }
7189 
gaudi2_get_pdma_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7190 static bool gaudi2_get_pdma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7191 		struct engines_data *e)
7192 {
7193 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7194 	unsigned long *mask = (unsigned long *) mask_arr;
7195 	const char *pdma_fmt = "%-6d%-9s%#-14x%#-15x%#x\n";
7196 	bool is_idle = true, is_eng_idle;
7197 	int engine_idx, i;
7198 	u64 offset;
7199 
7200 	if (e)
7201 		hl_engine_data_sprintf(e,
7202 					"\nPDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0  DMA_CORE_STS1\n"
7203 					"----  -------  ------------  -------------  -------------\n");
7204 
7205 	for (i = 0 ; i < NUM_OF_PDMA ; i++) {
7206 		engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
7207 		offset = i * PDMA_OFFSET;
7208 		dma_core_sts0 = RREG32(mmPDMA0_CORE_STS0 + offset);
7209 		dma_core_sts1 = RREG32(mmPDMA0_CORE_STS1 + offset);
7210 
7211 		qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
7212 		qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
7213 		qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
7214 
7215 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7216 				IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7217 		is_idle &= is_eng_idle;
7218 
7219 		if (mask && !is_eng_idle)
7220 			set_bit(engine_idx, mask);
7221 
7222 		if (e)
7223 			hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
7224 						qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7225 	}
7226 
7227 	return is_idle;
7228 }
7229 
gaudi2_get_nic_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7230 static bool gaudi2_get_nic_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7231 		struct engines_data *e)
7232 {
7233 	unsigned long *mask = (unsigned long *) mask_arr;
7234 	const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
7235 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7236 	bool is_idle = true, is_eng_idle;
7237 	int engine_idx, i;
7238 	u64 offset = 0;
7239 
7240 	/* NIC, twelve macros in Full chip */
7241 	if (e && hdev->nic_ports_mask)
7242 		hl_engine_data_sprintf(e,
7243 					"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
7244 					"---  -------  ------------  ----------\n");
7245 
7246 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
7247 		if (!(i & 1))
7248 			offset = i / 2 * NIC_OFFSET;
7249 		else
7250 			offset += NIC_QM_OFFSET;
7251 
7252 		if (!(hdev->nic_ports_mask & BIT(i)))
7253 			continue;
7254 
7255 		engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i;
7256 
7257 
7258 		qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
7259 		qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset);
7260 		qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
7261 
7262 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7263 		is_idle &= is_eng_idle;
7264 
7265 		if (mask && !is_eng_idle)
7266 			set_bit(engine_idx, mask);
7267 
7268 		if (e)
7269 			hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
7270 						qm_glbl_sts0, qm_cgm_sts);
7271 	}
7272 
7273 	return is_idle;
7274 }
7275 
gaudi2_get_mme_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7276 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7277 		struct engines_data *e)
7278 {
7279 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, mme_arch_sts;
7280 	unsigned long *mask = (unsigned long *) mask_arr;
7281 	const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
7282 	bool is_idle = true, is_eng_idle;
7283 	int engine_idx, i;
7284 	u64 offset;
7285 
7286 	if (e)
7287 		hl_engine_data_sprintf(e,
7288 					"\nMME  Stub  is_idle  QM_GLBL_STS0  MME_ARCH_STATUS\n"
7289 					"---  ----  -------  ------------  ---------------\n");
7290 	/* MME, one per Dcore */
7291 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7292 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
7293 		offset = i * DCORE_OFFSET;
7294 
7295 		qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset);
7296 		qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset);
7297 		qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset);
7298 
7299 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7300 		is_idle &= is_eng_idle;
7301 
7302 		mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset);
7303 		is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
7304 		is_idle &= is_eng_idle;
7305 
7306 		if (e)
7307 			hl_engine_data_sprintf(e, mme_fmt, i, "N",
7308 				is_eng_idle ? "Y" : "N",
7309 				qm_glbl_sts0,
7310 				mme_arch_sts);
7311 
7312 		if (mask && !is_eng_idle)
7313 			set_bit(engine_idx, mask);
7314 	}
7315 
7316 	return is_idle;
7317 }
7318 
gaudi2_is_tpc_engine_idle(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)7319 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
7320 					struct iterate_module_ctx *ctx)
7321 {
7322 	struct gaudi2_tpc_idle_data *idle_data = ctx->data;
7323 	u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7324 	bool is_eng_idle;
7325 	int engine_idx;
7326 
7327 	if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
7328 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7329 	else
7330 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
7331 				dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
7332 
7333 	tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
7334 	qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
7335 	qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
7336 	qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
7337 
7338 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7339 						IS_TPC_IDLE(tpc_cfg_sts);
7340 	*(idle_data->is_idle) &= is_eng_idle;
7341 
7342 	if (idle_data->mask && !is_eng_idle)
7343 		set_bit(engine_idx, idle_data->mask);
7344 
7345 	if (idle_data->e)
7346 		hl_engine_data_sprintf(idle_data->e,
7347 					idle_data->tpc_fmt, dcore, inst,
7348 					is_eng_idle ? "Y" : "N",
7349 					qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
7350 }
7351 
gaudi2_get_tpc_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7352 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7353 		struct engines_data *e)
7354 {
7355 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7356 	unsigned long *mask = (unsigned long *) mask_arr;
7357 	bool is_idle = true;
7358 
7359 	struct gaudi2_tpc_idle_data tpc_idle_data = {
7360 		.tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
7361 		.e = e,
7362 		.mask = mask,
7363 		.is_idle = &is_idle,
7364 	};
7365 	struct iterate_module_ctx tpc_iter = {
7366 		.fn = &gaudi2_is_tpc_engine_idle,
7367 		.data = &tpc_idle_data,
7368 	};
7369 
7370 	if (e && prop->tpc_enabled_mask)
7371 		hl_engine_data_sprintf(e,
7372 			"\nCORE  TPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  STATUS\n"
7373 			"----  ---  -------  ------------  ----------  ------\n");
7374 
7375 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
7376 
7377 	return *tpc_idle_data.is_idle;
7378 }
7379 
gaudi2_get_decoder_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7380 static bool gaudi2_get_decoder_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7381 		struct engines_data *e)
7382 {
7383 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7384 	unsigned long *mask = (unsigned long *) mask_arr;
7385 	const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
7386 	const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
7387 	bool is_idle = true, is_eng_idle;
7388 	u32 dec_swreg15, dec_enabled_bit;
7389 	int engine_idx, i, j;
7390 	u64 offset;
7391 
7392 	/* Decoders, two each Dcore and two shared PCIe decoders */
7393 	if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
7394 		hl_engine_data_sprintf(e,
7395 			"\nCORE  DEC  is_idle  VSI_CMD_SWREG15\n"
7396 			"----  ---  -------  ---------------\n");
7397 
7398 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7399 		for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) {
7400 			dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j);
7401 			if (!(prop->decoder_enabled_mask & dec_enabled_bit))
7402 				continue;
7403 
7404 			engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 +
7405 					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7406 			offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET;
7407 
7408 			dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset);
7409 			is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7410 			is_idle &= is_eng_idle;
7411 
7412 			if (mask && !is_eng_idle)
7413 				set_bit(engine_idx, mask);
7414 
7415 			if (e)
7416 				hl_engine_data_sprintf(e, dec_fmt, i, j,
7417 							is_eng_idle ? "Y" : "N", dec_swreg15);
7418 		}
7419 	}
7420 
7421 	if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
7422 		hl_engine_data_sprintf(e,
7423 			"\nPCIe DEC  is_idle  VSI_CMD_SWREG15\n"
7424 			"--------  -------  ---------------\n");
7425 
7426 	/* Check shared(PCIe) decoders */
7427 	for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) {
7428 		dec_enabled_bit = PCIE_DEC_SHIFT + i;
7429 		if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit)))
7430 			continue;
7431 
7432 		engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i;
7433 		offset = i * DCORE_DEC_OFFSET;
7434 		dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset);
7435 		is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7436 		is_idle &= is_eng_idle;
7437 
7438 		if (mask && !is_eng_idle)
7439 			set_bit(engine_idx, mask);
7440 
7441 		if (e)
7442 			hl_engine_data_sprintf(e, pcie_dec_fmt, i,
7443 						is_eng_idle ? "Y" : "N", dec_swreg15);
7444 	}
7445 
7446 	return is_idle;
7447 }
7448 
gaudi2_get_rotator_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7449 static bool gaudi2_get_rotator_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7450 		struct engines_data *e)
7451 {
7452 	const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-14x%#x\n";
7453 	unsigned long *mask = (unsigned long *) mask_arr;
7454 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7455 	bool is_idle = true, is_eng_idle;
7456 	int engine_idx, i;
7457 	u64 offset;
7458 
7459 	if (e)
7460 		hl_engine_data_sprintf(e,
7461 			"\nCORE  ROT  is_idle  QM_GLBL_STS0  QM_GLBL_STS1  QM_CGM_STS\n"
7462 			"----  ---  -------  ------------  ------------  ----------\n");
7463 
7464 	for (i = 0 ; i < NUM_OF_ROT ; i++) {
7465 		engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
7466 
7467 		offset = i * ROT_OFFSET;
7468 
7469 		qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset);
7470 		qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset);
7471 		qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset);
7472 
7473 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7474 		is_idle &= is_eng_idle;
7475 
7476 		if (mask && !is_eng_idle)
7477 			set_bit(engine_idx, mask);
7478 
7479 		if (e)
7480 			hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
7481 						qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7482 	}
7483 
7484 	return is_idle;
7485 }
7486 
gaudi2_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7487 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7488 					struct engines_data *e)
7489 {
7490 	bool is_idle = true;
7491 
7492 	is_idle &= gaudi2_get_edma_idle_status(hdev, mask_arr, mask_len, e);
7493 	is_idle &= gaudi2_get_pdma_idle_status(hdev, mask_arr, mask_len, e);
7494 	is_idle &= gaudi2_get_nic_idle_status(hdev, mask_arr, mask_len, e);
7495 	is_idle &= gaudi2_get_mme_idle_status(hdev, mask_arr, mask_len, e);
7496 	is_idle &= gaudi2_get_tpc_idle_status(hdev, mask_arr, mask_len, e);
7497 	is_idle &= gaudi2_get_decoder_idle_status(hdev, mask_arr, mask_len, e);
7498 	is_idle &= gaudi2_get_rotator_idle_status(hdev, mask_arr, mask_len, e);
7499 
7500 	return is_idle;
7501 }
7502 
gaudi2_hw_queues_lock(struct hl_device * hdev)7503 static void gaudi2_hw_queues_lock(struct hl_device *hdev)
7504 	__acquires(&gaudi2->hw_queues_lock)
7505 {
7506 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7507 
7508 	spin_lock(&gaudi2->hw_queues_lock);
7509 }
7510 
gaudi2_hw_queues_unlock(struct hl_device * hdev)7511 static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
7512 	__releases(&gaudi2->hw_queues_lock)
7513 {
7514 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7515 
7516 	spin_unlock(&gaudi2->hw_queues_lock);
7517 }
7518 
gaudi2_get_pci_id(struct hl_device * hdev)7519 static u32 gaudi2_get_pci_id(struct hl_device *hdev)
7520 {
7521 	return hdev->pdev->device;
7522 }
7523 
gaudi2_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)7524 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
7525 {
7526 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7527 
7528 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
7529 		return 0;
7530 
7531 	return hl_fw_get_eeprom_data(hdev, data, max_size);
7532 }
7533 
gaudi2_update_eq_ci(struct hl_device * hdev,u32 val)7534 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val)
7535 {
7536 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
7537 }
7538 
gaudi2_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)7539 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7540 {
7541 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7542 
7543 	if (aggregate) {
7544 		*size = (u32) sizeof(gaudi2->events_stat_aggregate);
7545 		return gaudi2->events_stat_aggregate;
7546 	}
7547 
7548 	*size = (u32) sizeof(gaudi2->events_stat);
7549 	return gaudi2->events_stat;
7550 }
7551 
gaudi2_mmu_vdec_dcore_prepare(struct hl_device * hdev,int dcore_id,int dcore_vdec_id,u32 rw_asid,u32 rw_mmu_bp)7552 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id,
7553 				int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7554 {
7555 	u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) *
7556 			dcore_vdec_id + DCORE_OFFSET * dcore_id;
7557 
7558 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7559 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7560 
7561 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7562 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7563 
7564 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7565 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7566 
7567 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7568 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7569 
7570 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7571 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7572 }
7573 
gaudi2_mmu_dcore_prepare(struct hl_device * hdev,int dcore_id,u32 asid)7574 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid)
7575 {
7576 	u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7577 			(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7578 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7579 	u32 dcore_offset = dcore_id * DCORE_OFFSET;
7580 	u32 vdec_id, i, ports_offset, reg_val;
7581 	u8 edma_seq_base;
7582 
7583 	/* EDMA */
7584 	edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE;
7585 	if (prop->edma_enabled_mask & BIT(edma_seq_base)) {
7586 		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7587 		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7588 		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7589 		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7590 	}
7591 
7592 	if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) {
7593 		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7594 		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7595 		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7596 		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7597 	}
7598 
7599 	/* Sync Mngr */
7600 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid);
7601 	/*
7602 	 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID
7603 	 * for any access type
7604 	 */
7605 	if (dcore_id > 0) {
7606 		reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) |
7607 			  (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT);
7608 		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val);
7609 		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0);
7610 	}
7611 
7612 	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0);
7613 	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid);
7614 
7615 	for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) {
7616 		ports_offset = i * DCORE_MME_SBTE_OFFSET;
7617 		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP +
7618 				dcore_offset + ports_offset, 0);
7619 		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID +
7620 				dcore_offset + ports_offset, rw_asid);
7621 	}
7622 
7623 	for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) {
7624 		ports_offset = i * DCORE_MME_WB_OFFSET;
7625 		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP +
7626 				dcore_offset + ports_offset, 0);
7627 		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID +
7628 				dcore_offset + ports_offset, rw_asid);
7629 	}
7630 
7631 	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7632 	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7633 
7634 	/*
7635 	 * Decoders
7636 	 */
7637 	for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) {
7638 		if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id))
7639 			gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0);
7640 	}
7641 }
7642 
gudi2_mmu_vdec_shared_prepare(struct hl_device * hdev,int shared_vdec_id,u32 rw_asid,u32 rw_mmu_bp)7643 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev,
7644 				int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7645 {
7646 	u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id;
7647 
7648 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7649 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7650 
7651 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7652 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7653 
7654 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7655 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7656 
7657 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7658 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7659 
7660 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7661 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7662 }
7663 
gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device * hdev,int arc_farm_id,u32 rw_asid,u32 rw_mmu_bp)7664 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id,
7665 							u32 rw_asid, u32 rw_mmu_bp)
7666 {
7667 	u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id;
7668 
7669 	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp);
7670 	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid);
7671 }
7672 
gaudi2_arc_mmu_prepare(struct hl_device * hdev,u32 cpu_id,u32 asid)7673 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid)
7674 {
7675 	u32 reg_base, reg_offset, reg_val = 0;
7676 
7677 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
7678 
7679 	/* Enable MMU and configure asid for all relevant ARC regions */
7680 	reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0);
7681 	reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid);
7682 
7683 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL);
7684 	WREG32(reg_base + reg_offset, reg_val);
7685 
7686 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW);
7687 	WREG32(reg_base + reg_offset, reg_val);
7688 
7689 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA);
7690 	WREG32(reg_base + reg_offset, reg_val);
7691 
7692 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA);
7693 	WREG32(reg_base + reg_offset, reg_val);
7694 
7695 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA);
7696 	WREG32(reg_base + reg_offset, reg_val);
7697 
7698 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE);
7699 	WREG32(reg_base + reg_offset, reg_val);
7700 
7701 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL);
7702 	WREG32(reg_base + reg_offset, reg_val);
7703 
7704 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL);
7705 	WREG32(reg_base + reg_offset, reg_val);
7706 
7707 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL);
7708 	WREG32(reg_base + reg_offset, reg_val);
7709 
7710 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL);
7711 	WREG32(reg_base + reg_offset, reg_val);
7712 
7713 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL);
7714 	WREG32(reg_base + reg_offset, reg_val);
7715 }
7716 
gaudi2_arc_mmu_prepare_all(struct hl_device * hdev,u32 asid)7717 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid)
7718 {
7719 	int i;
7720 
7721 	if (hdev->fw_components & FW_TYPE_BOOT_CPU)
7722 		return hl_fw_cpucp_engine_core_asid_set(hdev, asid);
7723 
7724 	for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7725 		gaudi2_arc_mmu_prepare(hdev, i, asid);
7726 
7727 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
7728 		if (!gaudi2_is_queue_enabled(hdev, i))
7729 			continue;
7730 
7731 		gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid);
7732 	}
7733 
7734 	return 0;
7735 }
7736 
gaudi2_mmu_shared_prepare(struct hl_device * hdev,u32 asid)7737 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
7738 {
7739 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7740 	u32 rw_asid, offset;
7741 	int rc, i;
7742 
7743 	rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) |
7744 			FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid);
7745 
7746 	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7747 	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7748 	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7749 	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7750 
7751 	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7752 	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7753 	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7754 	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7755 
7756 	/* ROT */
7757 	for (i = 0 ; i < NUM_OF_ROT ; i++) {
7758 		offset = i * ROT_OFFSET;
7759 		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid);
7760 		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7761 		RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK);
7762 		RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK);
7763 		RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK);
7764 	}
7765 
7766 	/* Shared Decoders are the last bits in the decoders mask */
7767 	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0))
7768 		gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0);
7769 
7770 	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1))
7771 		gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0);
7772 
7773 	/* arc farm arc dup eng */
7774 	for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7775 		gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0);
7776 
7777 	rc = gaudi2_arc_mmu_prepare_all(hdev, asid);
7778 	if (rc)
7779 		return rc;
7780 
7781 	return 0;
7782 }
7783 
gaudi2_tpc_mmu_prepare(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)7784 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst,	u32 offset,
7785 					struct iterate_module_ctx *ctx)
7786 {
7787 	struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
7788 
7789 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
7790 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
7791 	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7792 	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid);
7793 }
7794 
7795 /* zero the MMUBP and set the ASID */
gaudi2_mmu_prepare(struct hl_device * hdev,u32 asid)7796 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid)
7797 {
7798 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7799 	struct gaudi2_tpc_mmu_data tpc_mmu_data;
7800 	struct iterate_module_ctx tpc_iter = {
7801 		.fn = &gaudi2_tpc_mmu_prepare,
7802 		.data = &tpc_mmu_data,
7803 	};
7804 	int rc, i;
7805 
7806 	if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) {
7807 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
7808 		return -EINVAL;
7809 	}
7810 
7811 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
7812 		return 0;
7813 
7814 	rc = gaudi2_mmu_shared_prepare(hdev, asid);
7815 	if (rc)
7816 		return rc;
7817 
7818 	/* configure DCORE MMUs */
7819 	tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7820 				(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7821 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
7822 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
7823 		gaudi2_mmu_dcore_prepare(hdev, i, asid);
7824 
7825 	return 0;
7826 }
7827 
is_info_event(u32 event)7828 static inline bool is_info_event(u32 event)
7829 {
7830 	switch (event) {
7831 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
7832 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
7833 	case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY ... GAUDI2_EVENT_ARC_PWR_RD_MODE3:
7834 
7835 	/* return in case of NIC status event - these events are received periodically and not as
7836 	 * an indication to an error.
7837 	 */
7838 	case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
7839 	case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
7840 		return true;
7841 	default:
7842 		return false;
7843 	}
7844 }
7845 
gaudi2_print_event(struct hl_device * hdev,u16 event_type,bool ratelimited,const char * fmt,...)7846 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
7847 			bool ratelimited, const char *fmt, ...)
7848 {
7849 	struct va_format vaf;
7850 	va_list args;
7851 
7852 	va_start(args, fmt);
7853 	vaf.fmt = fmt;
7854 	vaf.va = &args;
7855 
7856 	if (ratelimited)
7857 		dev_err_ratelimited(hdev->dev, "%s: %pV\n",
7858 			gaudi2_irq_map_table[event_type].valid ?
7859 			gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7860 	else
7861 		dev_err(hdev->dev, "%s: %pV\n",
7862 			gaudi2_irq_map_table[event_type].valid ?
7863 			gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7864 
7865 	va_end(args);
7866 }
7867 
gaudi2_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)7868 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7869 		struct hl_eq_ecc_data *ecc_data)
7870 {
7871 	u64 ecc_address = 0, ecc_syndrome = 0;
7872 	u8 memory_wrapper_idx = 0;
7873 	bool has_block_id = false;
7874 	u16 block_id;
7875 
7876 	if (hl_fw_version_cmp(hdev, 1, 12, 0) >= 0)
7877 		has_block_id = true;
7878 
7879 	ecc_address = le64_to_cpu(ecc_data->ecc_address);
7880 	ecc_syndrome = le64_to_cpu(ecc_data->ecc_syndrom);
7881 	memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7882 
7883 	if (has_block_id) {
7884 		block_id = le16_to_cpu(ecc_data->block_id);
7885 		gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
7886 			"ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. block id %#x. critical %u.",
7887 			ecc_address, ecc_syndrome, memory_wrapper_idx, block_id,
7888 			ecc_data->is_critical);
7889 	} else {
7890 		gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
7891 			"ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. critical %u.",
7892 			ecc_address, ecc_syndrome, memory_wrapper_idx, ecc_data->is_critical);
7893 	}
7894 
7895 	return !!ecc_data->is_critical;
7896 }
7897 
handle_lower_qman_data_on_err(struct hl_device * hdev,u64 qman_base,u32 engine_id)7898 static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u32 engine_id)
7899 {
7900 	struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
7901 	u64 cq_ptr, cp_current_inst;
7902 	u32 lo, hi, cq_size, cp_sts;
7903 	bool is_arc_cq;
7904 
7905 	cp_sts = RREG32(qman_base + QM_CP_STS_4_OFFSET);
7906 	is_arc_cq = FIELD_GET(PDMA0_QM_CP_STS_CUR_CQ_MASK, cp_sts); /* 0 - legacy CQ, 1 - ARC_CQ */
7907 
7908 	if (is_arc_cq) {
7909 		lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_STS_OFFSET);
7910 		hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_STS_OFFSET);
7911 		cq_ptr = ((u64) hi) << 32 | lo;
7912 		cq_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_STS_OFFSET);
7913 	} else {
7914 		lo = RREG32(qman_base + QM_CQ_PTR_LO_STS_4_OFFSET);
7915 		hi = RREG32(qman_base + QM_CQ_PTR_HI_STS_4_OFFSET);
7916 		cq_ptr = ((u64) hi) << 32 | lo;
7917 		cq_size = RREG32(qman_base + QM_CQ_TSIZE_STS_4_OFFSET);
7918 	}
7919 
7920 	lo = RREG32(qman_base + QM_CP_CURRENT_INST_LO_4_OFFSET);
7921 	hi = RREG32(qman_base + QM_CP_CURRENT_INST_HI_4_OFFSET);
7922 	cp_current_inst = ((u64) hi) << 32 | lo;
7923 
7924 	dev_info(hdev->dev,
7925 		"LowerQM. %sCQ: {ptr %#llx, size %u}, CP: {instruction %#018llx}\n",
7926 		is_arc_cq ? "ARC_" : "", cq_ptr, cq_size, cp_current_inst);
7927 
7928 	if (undef_opcode->write_enable) {
7929 		memset(undef_opcode, 0, sizeof(*undef_opcode));
7930 		undef_opcode->timestamp = ktime_get();
7931 		undef_opcode->cq_addr = cq_ptr;
7932 		undef_opcode->cq_size = cq_size;
7933 		undef_opcode->engine_id = engine_id;
7934 		undef_opcode->stream_id = QMAN_STREAMS;
7935 		undef_opcode->write_enable = 0;
7936 	}
7937 }
7938 
gaudi2_handle_qman_err_generic(struct hl_device * hdev,u16 event_type,u64 qman_base,u32 qid_base,u64 * event_mask)7939 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
7940 						u64 qman_base, u32 qid_base, u64 *event_mask)
7941 {
7942 	u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
7943 	u64 glbl_sts_addr, arb_err_addr;
7944 	char reg_desc[32];
7945 
7946 	glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE);
7947 	arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE);
7948 
7949 	/* Iterate through all stream GLBL_ERR_STS registers + Lower CP */
7950 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7951 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7952 
7953 		if (!glbl_sts_val)
7954 			continue;
7955 
7956 		if (i == QMAN_STREAMS) {
7957 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerQM");
7958 			num_error_causes = GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE;
7959 		} else {
7960 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7961 			num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
7962 		}
7963 
7964 		for (j = 0 ; j < num_error_causes ; j++)
7965 			if (glbl_sts_val & BIT(j)) {
7966 				gaudi2_print_event(hdev, event_type, true,
7967 					"%s. err cause: %s", reg_desc,
7968 					i == QMAN_STREAMS ?
7969 					gaudi2_lower_qman_error_cause[j] :
7970 					gaudi2_qman_error_cause[j]);
7971 				error_count++;
7972 			}
7973 
7974 		/* Check for undefined opcode error in lower QM */
7975 		if ((i == QMAN_STREAMS) &&
7976 				(glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK)) {
7977 			handle_lower_qman_data_on_err(hdev, qman_base,
7978 							gaudi2_queue_id_to_engine_id[qid_base]);
7979 			*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
7980 		}
7981 	}
7982 
7983 	arb_err_val = RREG32(arb_err_addr);
7984 
7985 	if (!arb_err_val)
7986 		goto out;
7987 
7988 	for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7989 		if (arb_err_val & BIT(j)) {
7990 			gaudi2_print_event(hdev, event_type, true,
7991 				"ARB_ERR. err cause: %s",
7992 				gaudi2_qman_arb_error_cause[j]);
7993 			error_count++;
7994 		}
7995 	}
7996 
7997 out:
7998 	return error_count;
7999 }
8000 
gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device * hdev,u64 rtr_mstr_if_base_addr,bool is_write,char * name,enum gaudi2_engine_id id,u64 * event_mask)8001 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
8002 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
8003 			enum gaudi2_engine_id id, u64 *event_mask)
8004 {
8005 	u32 razwi_hi, razwi_lo, razwi_xy;
8006 	u16 eng_id = id;
8007 	u8 rd_wr_flag;
8008 
8009 	if (is_write) {
8010 		razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI);
8011 		razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO);
8012 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY);
8013 		rd_wr_flag = HL_RAZWI_WRITE;
8014 	} else {
8015 		razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
8016 		razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO);
8017 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY);
8018 		rd_wr_flag = HL_RAZWI_READ;
8019 	}
8020 
8021 	hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
8022 				rd_wr_flag | HL_RAZWI_HBW, event_mask);
8023 
8024 	dev_err_ratelimited(hdev->dev,
8025 		"%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
8026 		name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
8027 }
8028 
gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device * hdev,u64 rtr_mstr_if_base_addr,bool is_write,char * name,enum gaudi2_engine_id id,u64 * event_mask)8029 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
8030 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
8031 			enum gaudi2_engine_id id, u64 *event_mask)
8032 {
8033 	u64 razwi_addr = CFG_BASE;
8034 	u32 razwi_xy;
8035 	u16 eng_id = id;
8036 	u8 rd_wr_flag;
8037 
8038 	if (is_write) {
8039 		razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
8040 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
8041 		rd_wr_flag = HL_RAZWI_WRITE;
8042 	} else {
8043 		razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
8044 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
8045 		rd_wr_flag = HL_RAZWI_READ;
8046 	}
8047 
8048 	hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask);
8049 	dev_err_ratelimited(hdev->dev,
8050 				"%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%llX Initiator coordinates 0x%x\n",
8051 				name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
8052 						razwi_xy);
8053 }
8054 
gaudi2_razwi_calc_engine_id(struct hl_device * hdev,enum razwi_event_sources module,u8 module_idx)8055 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
8056 						enum razwi_event_sources module, u8 module_idx)
8057 {
8058 	switch (module) {
8059 	case RAZWI_TPC:
8060 		if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES))
8061 			return GAUDI2_DCORE0_ENGINE_ID_TPC_6;
8062 		return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8063 				(module_idx % NUM_OF_TPC_PER_DCORE) +
8064 				(GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
8065 
8066 	case RAZWI_MME:
8067 		return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) +
8068 			(module_idx * ENGINE_ID_DCORE_OFFSET));
8069 
8070 	case RAZWI_EDMA:
8071 		return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8072 			(module_idx % NUM_OF_EDMA_PER_DCORE));
8073 
8074 	case RAZWI_PDMA:
8075 		return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx);
8076 
8077 	case RAZWI_NIC:
8078 		return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx));
8079 
8080 	case RAZWI_DEC:
8081 		if (module_idx == 8)
8082 			return GAUDI2_PCIE_ENGINE_ID_DEC_0;
8083 
8084 		if (module_idx == 9)
8085 			return GAUDI2_PCIE_ENGINE_ID_DEC_1;
8086 					;
8087 		return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8088 				(module_idx % NUM_OF_DEC_PER_DCORE) +
8089 				(GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
8090 
8091 	case RAZWI_ROT:
8092 		return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
8093 
8094 	case RAZWI_ARC_FARM:
8095 		return GAUDI2_ENGINE_ID_ARC_FARM;
8096 
8097 	default:
8098 		return GAUDI2_ENGINE_ID_SIZE;
8099 	}
8100 }
8101 
8102 /*
8103  * This function handles RR(Range register) hit events.
8104  * raised be initiators not PSOC RAZWI.
8105  */
gaudi2_ack_module_razwi_event_handler(struct hl_device * hdev,enum razwi_event_sources module,u8 module_idx,u8 module_sub_idx,u64 * event_mask)8106 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
8107 				enum razwi_event_sources module, u8 module_idx,
8108 				u8 module_sub_idx, u64 *event_mask)
8109 {
8110 	bool via_sft = false;
8111 	u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, eng_id, binned_idx;
8112 	u64 hbw_rtr_mstr_if_base_addr, lbw_rtr_mstr_if_base_addr;
8113 	u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
8114 	u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
8115 	char initiator_name[64];
8116 
8117 	switch (module) {
8118 	case RAZWI_TPC:
8119 		sprintf(initiator_name, "TPC_%u", module_idx);
8120 		if (hdev->tpc_binning) {
8121 			binned_idx = __ffs(hdev->tpc_binning);
8122 			if (binned_idx == module_idx)
8123 				module_idx = TPC_ID_DCORE0_TPC6;
8124 		}
8125 
8126 		hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx];
8127 		lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx];
8128 		break;
8129 	case RAZWI_MME:
8130 		sprintf(initiator_name, "MME_%u", module_idx);
8131 		switch (module_sub_idx) {
8132 		case MME_WAP0:
8133 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
8134 			break;
8135 		case MME_WAP1:
8136 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
8137 			break;
8138 		case MME_WRITE:
8139 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
8140 			break;
8141 		case MME_READ:
8142 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
8143 			break;
8144 		case MME_SBTE0:
8145 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
8146 			break;
8147 		case MME_SBTE1:
8148 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
8149 			break;
8150 		case MME_SBTE2:
8151 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
8152 			break;
8153 		case MME_SBTE3:
8154 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
8155 			break;
8156 		case MME_SBTE4:
8157 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
8158 			break;
8159 		default:
8160 			return;
8161 		}
8162 		lbw_rtr_id = hbw_rtr_id;
8163 		break;
8164 	case RAZWI_EDMA:
8165 		hbw_rtr_mstr_if_base_addr = gaudi2_edma_initiator_hbw_sft[module_idx];
8166 		dcore_id = module_idx / NUM_OF_EDMA_PER_DCORE;
8167 		/* SFT has separate MSTR_IF for LBW, only there we can
8168 		 * read the LBW razwi related registers
8169 		 */
8170 		lbw_rtr_mstr_if_base_addr = mmSFT0_LBW_RTR_IF_MSTR_IF_RR_SHRD_HBW_BASE +
8171 								dcore_id * SFT_DCORE_OFFSET;
8172 		via_sft = true;
8173 		sprintf(initiator_name, "EDMA_%u", module_idx);
8174 		break;
8175 	case RAZWI_PDMA:
8176 		hbw_rtr_id = gaudi2_pdma_initiator_hbw_rtr_id[module_idx];
8177 		lbw_rtr_id = gaudi2_pdma_initiator_lbw_rtr_id[module_idx];
8178 		sprintf(initiator_name, "PDMA_%u", module_idx);
8179 		break;
8180 	case RAZWI_NIC:
8181 		hbw_rtr_id = gaudi2_nic_initiator_hbw_rtr_id[module_idx];
8182 		lbw_rtr_id = gaudi2_nic_initiator_lbw_rtr_id[module_idx];
8183 		sprintf(initiator_name, "NIC_%u", module_idx);
8184 		break;
8185 	case RAZWI_DEC:
8186 		sprintf(initiator_name, "DEC_%u", module_idx);
8187 		if (hdev->decoder_binning) {
8188 			binned_idx = __ffs(hdev->decoder_binning);
8189 			if (binned_idx == module_idx)
8190 				module_idx = DEC_ID_PCIE_VDEC1;
8191 		}
8192 		hbw_rtr_id = gaudi2_dec_initiator_hbw_rtr_id[module_idx];
8193 		lbw_rtr_id = gaudi2_dec_initiator_lbw_rtr_id[module_idx];
8194 		break;
8195 	case RAZWI_ROT:
8196 		hbw_rtr_id = gaudi2_rot_initiator_hbw_rtr_id[module_idx];
8197 		lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx];
8198 		sprintf(initiator_name, "ROT_%u", module_idx);
8199 		break;
8200 	case RAZWI_ARC_FARM:
8201 		lbw_rtr_id = DCORE1_RTR5;
8202 		hbw_rtr_id = DCORE1_RTR7;
8203 		sprintf(initiator_name, "ARC_FARM_%u", module_idx);
8204 		break;
8205 	default:
8206 		return;
8207 	}
8208 
8209 	/* Find router mstr_if register base */
8210 	if (!via_sft) {
8211 		dcore_id = hbw_rtr_id / NUM_OF_RTR_PER_DCORE;
8212 		dcore_rtr_id = hbw_rtr_id % NUM_OF_RTR_PER_DCORE;
8213 		hbw_rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
8214 				dcore_id * DCORE_OFFSET +
8215 				dcore_rtr_id * DCORE_RTR_OFFSET +
8216 				RTR_MSTR_IF_OFFSET;
8217 		lbw_rtr_mstr_if_base_addr = hbw_rtr_mstr_if_base_addr +
8218 				(((s32)lbw_rtr_id - hbw_rtr_id) * DCORE_RTR_OFFSET);
8219 	}
8220 
8221 	/* Find out event cause by reading "RAZWI_HAPPENED" registers */
8222 	hbw_shrd_aw = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
8223 	hbw_shrd_ar = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
8224 	lbw_shrd_aw = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
8225 	lbw_shrd_ar = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
8226 
8227 	eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx);
8228 	if (hbw_shrd_aw) {
8229 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, true,
8230 						initiator_name, eng_id, event_mask);
8231 
8232 		/* Clear event indication */
8233 		WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
8234 	}
8235 
8236 	if (hbw_shrd_ar) {
8237 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, false,
8238 						initiator_name, eng_id, event_mask);
8239 
8240 		/* Clear event indication */
8241 		WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
8242 	}
8243 
8244 	if (lbw_shrd_aw) {
8245 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, true,
8246 						initiator_name, eng_id, event_mask);
8247 
8248 		/* Clear event indication */
8249 		WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
8250 	}
8251 
8252 	if (lbw_shrd_ar) {
8253 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, false,
8254 						initiator_name, eng_id, event_mask);
8255 
8256 		/* Clear event indication */
8257 		WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
8258 	}
8259 }
8260 
gaudi2_check_if_razwi_happened(struct hl_device * hdev)8261 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
8262 {
8263 	struct asic_fixed_properties *prop = &hdev->asic_prop;
8264 	u8 mod_idx, sub_mod;
8265 
8266 	/* check all TPCs */
8267 	for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
8268 		if (prop->tpc_enabled_mask & BIT(mod_idx))
8269 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL);
8270 	}
8271 
8272 	/* check all MMEs */
8273 	for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8274 		for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
8275 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
8276 									sub_mod, NULL);
8277 
8278 	/* check all EDMAs */
8279 	for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8280 		if (prop->edma_enabled_mask & BIT(mod_idx))
8281 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL);
8282 
8283 	/* check all PDMAs */
8284 	for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
8285 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL);
8286 
8287 	/* check all NICs */
8288 	for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
8289 		if (hdev->nic_ports_mask & BIT(mod_idx))
8290 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
8291 								NULL);
8292 
8293 	/* check all DECs */
8294 	for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
8295 		if (prop->decoder_enabled_mask & BIT(mod_idx))
8296 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL);
8297 
8298 	/* check all ROTs */
8299 	for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
8300 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL);
8301 }
8302 
gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info * razwi_info,u32 array_size,u32 axuser_xy,u32 * base,u16 * eng_id,char * eng_name)8303 static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u32 array_size,
8304 						u32 axuser_xy, u32 *base, u16 *eng_id,
8305 						char *eng_name)
8306 {
8307 
8308 	int i, num_of_eng = 0;
8309 	u16 str_size = 0;
8310 
8311 	for (i = 0 ; i < array_size ; i++) {
8312 		if (axuser_xy != razwi_info[i].axuser_xy)
8313 			continue;
8314 
8315 		eng_id[num_of_eng] = razwi_info[i].eng_id;
8316 		base[num_of_eng] = razwi_info[i].rtr_ctrl;
8317 		if (!num_of_eng)
8318 			str_size += scnprintf(eng_name + str_size,
8319 						PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s",
8320 						razwi_info[i].eng_name);
8321 		else
8322 			str_size += scnprintf(eng_name + str_size,
8323 						PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s",
8324 						razwi_info[i].eng_name);
8325 		num_of_eng++;
8326 	}
8327 
8328 	return num_of_eng;
8329 }
8330 
gaudi2_handle_psoc_razwi_happened(struct hl_device * hdev,u32 razwi_reg,u64 * event_mask)8331 static bool gaudi2_handle_psoc_razwi_happened(struct hl_device *hdev, u32 razwi_reg,
8332 						u64 *event_mask)
8333 {
8334 	u32 axuser_xy = RAZWI_GET_AXUSER_XY(razwi_reg), addr_hi = 0, addr_lo = 0;
8335 	u32 base[PSOC_RAZWI_MAX_ENG_PER_RTR];
8336 	u16 num_of_eng, eng_id[PSOC_RAZWI_MAX_ENG_PER_RTR];
8337 	char eng_name_str[PSOC_RAZWI_ENG_STR_SIZE];
8338 	bool razwi_happened = false;
8339 	u64 addr;
8340 	int i;
8341 
8342 	num_of_eng = gaudi2_psoc_razwi_get_engines(common_razwi_info, ARRAY_SIZE(common_razwi_info),
8343 							axuser_xy, base, eng_id, eng_name_str);
8344 
8345 	/* If no match for XY coordinates, try to find it in MME razwi table */
8346 	if (!num_of_eng) {
8347 		axuser_xy = RAZWI_GET_AXUSER_LOW_XY(razwi_reg);
8348 		num_of_eng = gaudi2_psoc_razwi_get_engines(mme_razwi_info,
8349 								ARRAY_SIZE(mme_razwi_info),
8350 								axuser_xy, base, eng_id,
8351 								eng_name_str);
8352 	}
8353 
8354 	for  (i = 0 ; i < num_of_eng ; i++) {
8355 		if (RREG32(base[i] + DEC_RAZWI_HBW_AW_SET)) {
8356 			addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_HI);
8357 			addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_LO);
8358 			addr = ((u64)addr_hi << 32) + addr_lo;
8359 			if (addr) {
8360 				dev_err(hdev->dev,
8361 					"PSOC HBW AW RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8362 					eng_name_str, addr);
8363 				hl_handle_razwi(hdev, addr, &eng_id[0],
8364 					num_of_eng, HL_RAZWI_HBW | HL_RAZWI_WRITE, event_mask);
8365 				razwi_happened = true;
8366 			}
8367 		}
8368 
8369 		if (RREG32(base[i] + DEC_RAZWI_HBW_AR_SET)) {
8370 			addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_HI);
8371 			addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_LO);
8372 			addr = ((u64)addr_hi << 32) + addr_lo;
8373 			if (addr) {
8374 				dev_err(hdev->dev,
8375 					"PSOC HBW AR RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8376 					eng_name_str, addr);
8377 				hl_handle_razwi(hdev, addr, &eng_id[0],
8378 					num_of_eng, HL_RAZWI_HBW | HL_RAZWI_READ, event_mask);
8379 				razwi_happened = true;
8380 			}
8381 		}
8382 
8383 		if (RREG32(base[i] + DEC_RAZWI_LBW_AW_SET)) {
8384 			addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AW_ADDR);
8385 			if (addr_lo) {
8386 				dev_err(hdev->dev,
8387 					"PSOC LBW AW RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8388 					eng_name_str, addr_lo);
8389 				hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8390 					num_of_eng, HL_RAZWI_LBW | HL_RAZWI_WRITE, event_mask);
8391 				razwi_happened = true;
8392 			}
8393 		}
8394 
8395 		if (RREG32(base[i] + DEC_RAZWI_LBW_AR_SET)) {
8396 			addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AR_ADDR);
8397 			if (addr_lo) {
8398 				dev_err(hdev->dev,
8399 						"PSOC LBW AR RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8400 						eng_name_str, addr_lo);
8401 				hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8402 					num_of_eng, HL_RAZWI_LBW | HL_RAZWI_READ, event_mask);
8403 				razwi_happened = true;
8404 			}
8405 		}
8406 		/* In common case the loop will break, when there is only one engine id, or
8407 		 * several engines with the same router. The exceptional case is with psoc razwi
8408 		 * from EDMA, where it's possible to get axuser id which fits 2 routers (2
8409 		 * interfaces of sft router). In this case, maybe the first router won't hold info
8410 		 * and we will need to iterate on the other router.
8411 		 */
8412 		if (razwi_happened)
8413 			break;
8414 	}
8415 
8416 	return razwi_happened;
8417 }
8418 
8419 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
gaudi2_ack_psoc_razwi_event_handler(struct hl_device * hdev,u64 * event_mask)8420 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask)
8421 {
8422 	u32 razwi_mask_info, razwi_intr = 0, error_count = 0;
8423 
8424 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
8425 		razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
8426 		if (!razwi_intr)
8427 			return 0;
8428 	}
8429 
8430 	razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
8431 
8432 	dev_err_ratelimited(hdev->dev,
8433 		"PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
8434 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
8435 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
8436 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
8437 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info),
8438 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
8439 
8440 	if (gaudi2_handle_psoc_razwi_happened(hdev, razwi_mask_info, event_mask))
8441 		error_count++;
8442 	else
8443 		dev_err_ratelimited(hdev->dev,
8444 				"PSOC RAZWI interrupt: invalid razwi info (0x%x)\n",
8445 				razwi_mask_info);
8446 
8447 	/* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
8448 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
8449 		WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
8450 
8451 	return error_count;
8452 }
8453 
_gaudi2_handle_qm_sei_err(struct hl_device * hdev,u64 qman_base,u16 event_type)8454 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type)
8455 {
8456 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8457 
8458 	sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET);
8459 
8460 	for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) {
8461 		if (sts_val & BIT(i)) {
8462 			gaudi2_print_event(hdev, event_type, true,
8463 				"err cause: %s", gaudi2_qm_sei_error_cause[i]);
8464 			sts_clr_val |= BIT(i);
8465 			error_count++;
8466 		}
8467 	}
8468 
8469 	WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val);
8470 
8471 	return error_count;
8472 }
8473 
gaudi2_handle_qm_sei_err(struct hl_device * hdev,u16 event_type,bool extended_err_check,u64 * event_mask)8474 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
8475 					bool extended_err_check, u64 *event_mask)
8476 {
8477 	enum razwi_event_sources module;
8478 	u32 error_count = 0;
8479 	u64 qman_base;
8480 	u8 index;
8481 
8482 	switch (event_type) {
8483 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP:
8484 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
8485 		qman_base = mmDCORE0_TPC0_QM_BASE +
8486 				(index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
8487 				(index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
8488 		module = RAZWI_TPC;
8489 		break;
8490 	case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
8491 		qman_base = mmDCORE0_TPC6_QM_BASE;
8492 		module = RAZWI_TPC;
8493 		break;
8494 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
8495 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
8496 	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
8497 	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
8498 		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
8499 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
8500 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
8501 		qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
8502 		module = RAZWI_MME;
8503 		break;
8504 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
8505 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
8506 		index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
8507 		qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
8508 		module = RAZWI_PDMA;
8509 		break;
8510 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
8511 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
8512 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
8513 		qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
8514 		module = RAZWI_ROT;
8515 		break;
8516 	default:
8517 		return 0;
8518 	}
8519 
8520 	error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8521 
8522 	/* There is a single event per NIC macro, so should check its both QMAN blocks */
8523 	if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE &&
8524 			event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE)
8525 		error_count += _gaudi2_handle_qm_sei_err(hdev,
8526 					qman_base + NIC_QM_OFFSET, event_type);
8527 
8528 	if (extended_err_check) {
8529 		/* check if RAZWI happened */
8530 		gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, event_mask);
8531 		hl_check_for_glbl_errors(hdev);
8532 	}
8533 
8534 	return error_count;
8535 }
8536 
gaudi2_handle_qman_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)8537 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8538 {
8539 	u32 qid_base, error_count = 0;
8540 	u64 qman_base;
8541 	u8 index = 0;
8542 
8543 	switch (event_type) {
8544 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
8545 		index = event_type - GAUDI2_EVENT_TPC0_QM;
8546 		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS;
8547 		qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8548 		break;
8549 	case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM:
8550 		index = event_type - GAUDI2_EVENT_TPC6_QM;
8551 		qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS;
8552 		qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8553 		break;
8554 	case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM:
8555 		index = event_type - GAUDI2_EVENT_TPC12_QM;
8556 		qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS;
8557 		qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8558 		break;
8559 	case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM:
8560 		index = event_type - GAUDI2_EVENT_TPC18_QM;
8561 		qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS;
8562 		qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8563 		break;
8564 	case GAUDI2_EVENT_TPC24_QM:
8565 		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
8566 		qman_base = mmDCORE0_TPC6_QM_BASE;
8567 		break;
8568 	case GAUDI2_EVENT_MME0_QM:
8569 		qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
8570 		qman_base = mmDCORE0_MME_QM_BASE;
8571 		break;
8572 	case GAUDI2_EVENT_MME1_QM:
8573 		qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
8574 		qman_base = mmDCORE1_MME_QM_BASE;
8575 		break;
8576 	case GAUDI2_EVENT_MME2_QM:
8577 		qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
8578 		qman_base = mmDCORE2_MME_QM_BASE;
8579 		break;
8580 	case GAUDI2_EVENT_MME3_QM:
8581 		qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
8582 		qman_base = mmDCORE3_MME_QM_BASE;
8583 		break;
8584 	case GAUDI2_EVENT_HDMA0_QM:
8585 		index = 0;
8586 		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0;
8587 		qman_base = mmDCORE0_EDMA0_QM_BASE;
8588 		break;
8589 	case GAUDI2_EVENT_HDMA1_QM:
8590 		index = 1;
8591 		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0;
8592 		qman_base = mmDCORE0_EDMA1_QM_BASE;
8593 		break;
8594 	case GAUDI2_EVENT_HDMA2_QM:
8595 		index = 2;
8596 		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0;
8597 		qman_base = mmDCORE1_EDMA0_QM_BASE;
8598 		break;
8599 	case GAUDI2_EVENT_HDMA3_QM:
8600 		index = 3;
8601 		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0;
8602 		qman_base = mmDCORE1_EDMA1_QM_BASE;
8603 		break;
8604 	case GAUDI2_EVENT_HDMA4_QM:
8605 		index = 4;
8606 		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0;
8607 		qman_base = mmDCORE2_EDMA0_QM_BASE;
8608 		break;
8609 	case GAUDI2_EVENT_HDMA5_QM:
8610 		index = 5;
8611 		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0;
8612 		qman_base = mmDCORE2_EDMA1_QM_BASE;
8613 		break;
8614 	case GAUDI2_EVENT_HDMA6_QM:
8615 		index = 6;
8616 		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0;
8617 		qman_base = mmDCORE3_EDMA0_QM_BASE;
8618 		break;
8619 	case GAUDI2_EVENT_HDMA7_QM:
8620 		index = 7;
8621 		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0;
8622 		qman_base = mmDCORE3_EDMA1_QM_BASE;
8623 		break;
8624 	case GAUDI2_EVENT_PDMA0_QM:
8625 		qid_base = GAUDI2_QUEUE_ID_PDMA_0_0;
8626 		qman_base = mmPDMA0_QM_BASE;
8627 		break;
8628 	case GAUDI2_EVENT_PDMA1_QM:
8629 		qid_base = GAUDI2_QUEUE_ID_PDMA_1_0;
8630 		qman_base = mmPDMA1_QM_BASE;
8631 		break;
8632 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
8633 		qid_base = GAUDI2_QUEUE_ID_ROT_0_0;
8634 		qman_base = mmROT0_QM_BASE;
8635 		break;
8636 	case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
8637 		qid_base = GAUDI2_QUEUE_ID_ROT_1_0;
8638 		qman_base = mmROT1_QM_BASE;
8639 		break;
8640 	default:
8641 		return 0;
8642 	}
8643 
8644 	error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base,
8645 								qid_base, event_mask);
8646 
8647 	/* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
8648 	if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) {
8649 		error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8650 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, index, 0, event_mask);
8651 	}
8652 
8653 	hl_check_for_glbl_errors(hdev);
8654 
8655 	return error_count;
8656 }
8657 
gaudi2_handle_arc_farm_sei_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)8658 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8659 {
8660 	u32 i, sts_val, sts_clr_val, error_count = 0, arc_farm;
8661 
8662 	for (arc_farm = 0 ; arc_farm < NUM_OF_ARC_FARMS_ARC ; arc_farm++) {
8663 		sts_clr_val = 0;
8664 		sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS +
8665 				(arc_farm * ARC_FARM_OFFSET));
8666 
8667 		for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
8668 			if (sts_val & BIT(i)) {
8669 				gaudi2_print_event(hdev, event_type, true,
8670 						"ARC FARM ARC %u err cause: %s",
8671 						arc_farm, gaudi2_arc_sei_error_cause[i]);
8672 				sts_clr_val |= BIT(i);
8673 				error_count++;
8674 			}
8675 		}
8676 		WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR + (arc_farm * ARC_FARM_OFFSET),
8677 				sts_clr_val);
8678 	}
8679 
8680 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ARC_FARM, 0, 0, event_mask);
8681 	hl_check_for_glbl_errors(hdev);
8682 
8683 	return error_count;
8684 }
8685 
gaudi2_handle_cpu_sei_err(struct hl_device * hdev,u16 event_type)8686 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type)
8687 {
8688 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8689 
8690 	sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS);
8691 
8692 	for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) {
8693 		if (sts_val & BIT(i)) {
8694 			gaudi2_print_event(hdev, event_type, true,
8695 				"err cause: %s", gaudi2_cpu_sei_error_cause[i]);
8696 			sts_clr_val |= BIT(i);
8697 			error_count++;
8698 		}
8699 	}
8700 
8701 	hl_check_for_glbl_errors(hdev);
8702 
8703 	WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val);
8704 
8705 	return error_count;
8706 }
8707 
gaudi2_handle_rot_err(struct hl_device * hdev,u8 rot_index,u16 event_type,struct hl_eq_razwi_with_intr_cause * razwi_with_intr_cause,u64 * event_mask)8708 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type,
8709 					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8710 					u64 *event_mask)
8711 {
8712 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8713 	u32 error_count = 0;
8714 	int i;
8715 
8716 	for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++)
8717 		if (intr_cause_data & BIT(i)) {
8718 			gaudi2_print_event(hdev, event_type, true,
8719 				"err cause: %s", guadi2_rot_error_cause[i]);
8720 			error_count++;
8721 		}
8722 
8723 	/* check if RAZWI happened */
8724 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, event_mask);
8725 	hl_check_for_glbl_errors(hdev);
8726 
8727 	return error_count;
8728 }
8729 
gaudi2_tpc_ack_interrupts(struct hl_device * hdev,u8 tpc_index,u16 event_type,struct hl_eq_razwi_with_intr_cause * razwi_with_intr_cause,u64 * event_mask)8730 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev,  u8 tpc_index, u16 event_type,
8731 					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8732 					u64 *event_mask)
8733 {
8734 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8735 	u32 error_count = 0;
8736 	int i;
8737 
8738 	for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++)
8739 		if (intr_cause_data & BIT(i)) {
8740 			gaudi2_print_event(hdev, event_type, true,
8741 				"interrupt cause: %s",  gaudi2_tpc_interrupts_cause[i]);
8742 			error_count++;
8743 		}
8744 
8745 	/* check if RAZWI happened */
8746 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, event_mask);
8747 	hl_check_for_glbl_errors(hdev);
8748 
8749 	return error_count;
8750 }
8751 
gaudi2_handle_dec_err(struct hl_device * hdev,u8 dec_index,u16 event_type,u64 * event_mask)8752 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type,
8753 					u64 *event_mask)
8754 {
8755 	u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0;
8756 	int i;
8757 
8758 	if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES)
8759 		/* DCORE DEC */
8760 		sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR +
8761 				DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) +
8762 				DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE);
8763 	else
8764 		/* PCIE DEC */
8765 		sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET *
8766 				(dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES);
8767 
8768 	sts_val = RREG32(sts_addr);
8769 
8770 	for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) {
8771 		if (sts_val & BIT(i)) {
8772 			gaudi2_print_event(hdev, event_type, true,
8773 				"err cause: %s", gaudi2_dec_error_cause[i]);
8774 			sts_clr_val |= BIT(i);
8775 			error_count++;
8776 		}
8777 	}
8778 
8779 	/* check if RAZWI happened */
8780 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, event_mask);
8781 	hl_check_for_glbl_errors(hdev);
8782 
8783 	/* Write 1 clear errors */
8784 	WREG32(sts_addr, sts_clr_val);
8785 
8786 	return error_count;
8787 }
8788 
gaudi2_handle_mme_err(struct hl_device * hdev,u8 mme_index,u16 event_type,u64 * event_mask)8789 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8790 					u64 *event_mask)
8791 {
8792 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8793 	int i;
8794 
8795 	sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index;
8796 	sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index;
8797 
8798 	sts_val = RREG32(sts_addr);
8799 
8800 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) {
8801 		if (sts_val & BIT(i)) {
8802 			gaudi2_print_event(hdev, event_type, true,
8803 				"err cause: %s", guadi2_mme_error_cause[i]);
8804 			sts_clr_val |= BIT(i);
8805 			error_count++;
8806 		}
8807 	}
8808 
8809 	/* check if RAZWI happened */
8810 	for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
8811 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, event_mask);
8812 
8813 	hl_check_for_glbl_errors(hdev);
8814 
8815 	WREG32(sts_clr_addr, sts_clr_val);
8816 
8817 	return error_count;
8818 }
8819 
gaudi2_handle_mme_sbte_err(struct hl_device * hdev,u16 event_type)8820 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type)
8821 {
8822 	/*
8823 	 * We have a single error cause here but the report mechanism is
8824 	 * buggy. Hence there is no good reason to fetch the cause so we
8825 	 * just check for glbl_errors and exit.
8826 	 */
8827 	hl_check_for_glbl_errors(hdev);
8828 
8829 	return GAUDI2_NA_EVENT_CAUSE;
8830 }
8831 
gaudi2_handle_mme_wap_err(struct hl_device * hdev,u8 mme_index,u16 event_type,u64 * event_mask)8832 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8833 					u64 *event_mask)
8834 {
8835 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8836 	int i;
8837 
8838 	sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index;
8839 	sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index;
8840 
8841 	sts_val = RREG32(sts_addr);
8842 
8843 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) {
8844 		if (sts_val & BIT(i)) {
8845 			gaudi2_print_event(hdev, event_type, true,
8846 				"err cause: %s", guadi2_mme_wap_error_cause[i]);
8847 			sts_clr_val |= BIT(i);
8848 			error_count++;
8849 		}
8850 	}
8851 
8852 	/* check if RAZWI happened on WAP0/1 */
8853 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, event_mask);
8854 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, event_mask);
8855 	hl_check_for_glbl_errors(hdev);
8856 
8857 	WREG32(sts_clr_addr, sts_clr_val);
8858 
8859 	return error_count;
8860 }
8861 
gaudi2_handle_kdma_core_event(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)8862 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type,
8863 					u64 intr_cause_data)
8864 {
8865 	u32 error_count = 0;
8866 	int i;
8867 
8868 	/* If an AXI read or write error is received, an error is reported and
8869 	 * interrupt message is sent. Due to an HW errata, when reading the cause
8870 	 * register of the KDMA engine, the reported error is always HBW even if
8871 	 * the actual error caused by a LBW KDMA transaction.
8872 	 */
8873 	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8874 		if (intr_cause_data & BIT(i)) {
8875 			gaudi2_print_event(hdev, event_type, true,
8876 				"err cause: %s", gaudi2_kdma_core_interrupts_cause[i]);
8877 			error_count++;
8878 		}
8879 
8880 	hl_check_for_glbl_errors(hdev);
8881 
8882 	return error_count;
8883 }
8884 
gaudi2_handle_dma_core_event(struct hl_device * hdev,u16 event_type,u64 intr_cause)8885 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, u64 intr_cause)
8886 {
8887 	u32 error_count = 0;
8888 	int i;
8889 
8890 	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8891 		if (intr_cause & BIT(i)) {
8892 			gaudi2_print_event(hdev, event_type, true,
8893 				"err cause: %s", gaudi2_dma_core_interrupts_cause[i]);
8894 			error_count++;
8895 		}
8896 
8897 	hl_check_for_glbl_errors(hdev);
8898 
8899 	return error_count;
8900 }
8901 
gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device * hdev,u64 * event_mask)8902 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
8903 {
8904 	u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
8905 
8906 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
8907 	if (RREG32(razwi_happened_addr)) {
8908 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8909 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8910 		WREG32(razwi_happened_addr, 0x1);
8911 	}
8912 
8913 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
8914 	if (RREG32(razwi_happened_addr)) {
8915 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8916 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8917 		WREG32(razwi_happened_addr, 0x1);
8918 	}
8919 
8920 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
8921 	if (RREG32(razwi_happened_addr)) {
8922 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8923 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8924 		WREG32(razwi_happened_addr, 0x1);
8925 	}
8926 
8927 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
8928 	if (RREG32(razwi_happened_addr)) {
8929 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8930 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8931 		WREG32(razwi_happened_addr, 0x1);
8932 	}
8933 }
8934 
gaudi2_print_pcie_addr_dec_info(struct hl_device * hdev,u16 event_type,u64 intr_cause_data,u64 * event_mask)8935 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type,
8936 					u64 intr_cause_data, u64 *event_mask)
8937 {
8938 	u32 error_count = 0;
8939 	int i;
8940 
8941 	for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
8942 		if (!(intr_cause_data & BIT_ULL(i)))
8943 			continue;
8944 
8945 		gaudi2_print_event(hdev, event_type, true,
8946 			"err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]);
8947 		error_count++;
8948 
8949 		switch (intr_cause_data & BIT_ULL(i)) {
8950 		case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
8951 			hl_check_for_glbl_errors(hdev);
8952 			break;
8953 		case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
8954 			gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
8955 			break;
8956 		}
8957 	}
8958 
8959 	return error_count;
8960 }
8961 
gaudi2_handle_pif_fatal(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)8962 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type,
8963 				u64 intr_cause_data)
8964 
8965 {
8966 	u32 error_count = 0;
8967 	int i;
8968 
8969 	for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) {
8970 		if (intr_cause_data & BIT_ULL(i)) {
8971 			gaudi2_print_event(hdev, event_type, true,
8972 				"err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]);
8973 			error_count++;
8974 		}
8975 	}
8976 
8977 	return error_count;
8978 }
8979 
gaudi2_handle_hif_fatal(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)8980 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data)
8981 {
8982 	u32 error_count = 0;
8983 	int i;
8984 
8985 	for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) {
8986 		if (intr_cause_data & BIT_ULL(i)) {
8987 			gaudi2_print_event(hdev, event_type, true,
8988 				"err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]);
8989 			error_count++;
8990 		}
8991 	}
8992 
8993 	return error_count;
8994 }
8995 
gaudi2_handle_page_error(struct hl_device * hdev,u64 mmu_base,bool is_pmmu,u64 * event_mask)8996 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu,
8997 					u64 *event_mask)
8998 {
8999 	u32 valid, val;
9000 	u64 addr;
9001 
9002 	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
9003 
9004 	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK))
9005 		return;
9006 
9007 	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE));
9008 	addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK;
9009 	addr <<= 32;
9010 	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
9011 
9012 	if (is_pmmu) {
9013 		dev_err_ratelimited(hdev->dev, "PMMU page fault on va 0x%llx\n", addr);
9014 	} else {
9015 		addr = gaudi2_mmu_descramble_addr(hdev, addr);
9016 		addr &= HW_UNSCRAMBLED_BITS_MASK;
9017 		dev_err_ratelimited(hdev->dev, "HMMU page fault on va range 0x%llx - 0x%llx\n",
9018 				addr, addr + ~HW_UNSCRAMBLED_BITS_MASK);
9019 	}
9020 
9021 	hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask);
9022 
9023 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
9024 }
9025 
gaudi2_handle_access_error(struct hl_device * hdev,u64 mmu_base,bool is_pmmu)9026 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
9027 {
9028 	u32 valid, val;
9029 	u64 addr;
9030 
9031 	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
9032 
9033 	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK))
9034 		return;
9035 
9036 	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE));
9037 	addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK;
9038 	addr <<= 32;
9039 	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
9040 
9041 	if (!is_pmmu)
9042 		addr = gaudi2_mmu_descramble_addr(hdev, addr);
9043 
9044 	dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
9045 				is_pmmu ? "PMMU" : "HMMU", addr);
9046 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
9047 }
9048 
gaudi2_handle_mmu_spi_sei_generic(struct hl_device * hdev,u16 event_type,u64 mmu_base,bool is_pmmu,u64 * event_mask)9049 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type,
9050 						u64 mmu_base, bool is_pmmu, u64 *event_mask)
9051 {
9052 	u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0;
9053 	int i;
9054 
9055 	spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET);
9056 
9057 	for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) {
9058 		if (spi_sei_cause & BIT(i)) {
9059 			gaudi2_print_event(hdev, event_type, true,
9060 				"err cause: %s", gaudi2_mmu_spi_sei[i].cause);
9061 
9062 			if (i == 0)
9063 				gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask);
9064 			else if (i == 1)
9065 				gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
9066 
9067 			if (gaudi2_mmu_spi_sei[i].clear_bit >= 0)
9068 				interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit);
9069 
9070 			error_count++;
9071 		}
9072 	}
9073 
9074 	/* Clear cause */
9075 	WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause);
9076 
9077 	/* Clear interrupt */
9078 	WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
9079 
9080 	return error_count;
9081 }
9082 
gaudi2_handle_sm_err(struct hl_device * hdev,u16 event_type,u8 sm_index)9083 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index)
9084 {
9085 	u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log,
9086 		cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0;
9087 	int i;
9088 
9089 	sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
9090 	cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index;
9091 
9092 	sei_cause_val = RREG32(sei_cause_addr);
9093 	sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val);
9094 	cq_intr_val = RREG32(cq_intr_addr);
9095 
9096 	/* SEI interrupt */
9097 	if (sei_cause_cause) {
9098 		/* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */
9099 		sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK,
9100 					sei_cause_val);
9101 
9102 		for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) {
9103 			if (!(sei_cause_cause & BIT(i)))
9104 				continue;
9105 
9106 			gaudi2_print_event(hdev, event_type, true,
9107 				"err cause: %s. %s: 0x%X",
9108 				gaudi2_sm_sei_cause[i].cause_name,
9109 				gaudi2_sm_sei_cause[i].log_name,
9110 				sei_cause_log);
9111 			error_count++;
9112 			break;
9113 		}
9114 
9115 		/* Clear SM_SEI_CAUSE */
9116 		WREG32(sei_cause_addr, 0);
9117 	}
9118 
9119 	/* CQ interrupt */
9120 	if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) {
9121 		cq_intr_queue_index =
9122 				FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK,
9123 					cq_intr_val);
9124 
9125 		dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n",
9126 				sm_index, cq_intr_queue_index);
9127 		error_count++;
9128 
9129 		/* Clear CQ_INTR */
9130 		WREG32(cq_intr_addr, 0);
9131 	}
9132 
9133 	hl_check_for_glbl_errors(hdev);
9134 
9135 	return error_count;
9136 }
9137 
get_hmmu_base(u16 event_type)9138 static u64 get_hmmu_base(u16 event_type)
9139 {
9140 	u8 dcore, index_in_dcore;
9141 
9142 	switch (event_type) {
9143 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP:
9144 	case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU0_SECURITY_ERROR:
9145 		dcore = 0;
9146 		index_in_dcore = 0;
9147 	break;
9148 	case GAUDI2_EVENT_HMMU_1_AXI_ERR_RSP:
9149 	case GAUDI2_EVENT_HMMU1_SPI_BASE ... GAUDI2_EVENT_HMMU1_SECURITY_ERROR:
9150 		dcore = 1;
9151 		index_in_dcore = 0;
9152 	break;
9153 	case GAUDI2_EVENT_HMMU_2_AXI_ERR_RSP:
9154 	case GAUDI2_EVENT_HMMU2_SPI_BASE ... GAUDI2_EVENT_HMMU2_SECURITY_ERROR:
9155 		dcore = 0;
9156 		index_in_dcore = 1;
9157 	break;
9158 	case GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
9159 	case GAUDI2_EVENT_HMMU3_SPI_BASE ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
9160 		dcore = 1;
9161 		index_in_dcore = 1;
9162 	break;
9163 	case GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
9164 	case GAUDI2_EVENT_HMMU4_SPI_BASE ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
9165 		dcore = 3;
9166 		index_in_dcore = 2;
9167 	break;
9168 	case GAUDI2_EVENT_HMMU_5_AXI_ERR_RSP:
9169 	case GAUDI2_EVENT_HMMU5_SPI_BASE ... GAUDI2_EVENT_HMMU5_SECURITY_ERROR:
9170 		dcore = 2;
9171 		index_in_dcore = 2;
9172 	break;
9173 	case GAUDI2_EVENT_HMMU_6_AXI_ERR_RSP:
9174 	case GAUDI2_EVENT_HMMU6_SPI_BASE ... GAUDI2_EVENT_HMMU6_SECURITY_ERROR:
9175 		dcore = 3;
9176 		index_in_dcore = 3;
9177 	break;
9178 	case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP:
9179 	case GAUDI2_EVENT_HMMU7_SPI_BASE ... GAUDI2_EVENT_HMMU7_SECURITY_ERROR:
9180 		dcore = 2;
9181 		index_in_dcore = 3;
9182 	break;
9183 	case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP:
9184 	case GAUDI2_EVENT_HMMU8_SPI_BASE ... GAUDI2_EVENT_HMMU8_SECURITY_ERROR:
9185 		dcore = 0;
9186 		index_in_dcore = 2;
9187 	break;
9188 	case GAUDI2_EVENT_HMMU_9_AXI_ERR_RSP:
9189 	case GAUDI2_EVENT_HMMU9_SPI_BASE ... GAUDI2_EVENT_HMMU9_SECURITY_ERROR:
9190 		dcore = 1;
9191 		index_in_dcore = 2;
9192 	break;
9193 	case GAUDI2_EVENT_HMMU_10_AXI_ERR_RSP:
9194 	case GAUDI2_EVENT_HMMU10_SPI_BASE ... GAUDI2_EVENT_HMMU10_SECURITY_ERROR:
9195 		dcore = 0;
9196 		index_in_dcore = 3;
9197 	break;
9198 	case GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
9199 	case GAUDI2_EVENT_HMMU11_SPI_BASE ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
9200 		dcore = 1;
9201 		index_in_dcore = 3;
9202 	break;
9203 	case GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9204 	case GAUDI2_EVENT_HMMU12_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9205 		dcore = 3;
9206 		index_in_dcore = 0;
9207 	break;
9208 	case GAUDI2_EVENT_HMMU_13_AXI_ERR_RSP:
9209 	case GAUDI2_EVENT_HMMU13_SPI_BASE ... GAUDI2_EVENT_HMMU13_SECURITY_ERROR:
9210 		dcore = 2;
9211 		index_in_dcore = 0;
9212 	break;
9213 	case GAUDI2_EVENT_HMMU_14_AXI_ERR_RSP:
9214 	case GAUDI2_EVENT_HMMU14_SPI_BASE ... GAUDI2_EVENT_HMMU14_SECURITY_ERROR:
9215 		dcore = 3;
9216 		index_in_dcore = 1;
9217 	break;
9218 	case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP:
9219 	case GAUDI2_EVENT_HMMU15_SPI_BASE ... GAUDI2_EVENT_HMMU15_SECURITY_ERROR:
9220 		dcore = 2;
9221 		index_in_dcore = 1;
9222 	break;
9223 	default:
9224 		return ULONG_MAX;
9225 	}
9226 
9227 	return mmDCORE0_HMMU0_MMU_BASE + dcore * DCORE_OFFSET + index_in_dcore * DCORE_HMMU_OFFSET;
9228 }
9229 
gaudi2_handle_mmu_spi_sei_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)9230 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9231 {
9232 	bool is_pmmu = false;
9233 	u32 error_count = 0;
9234 	u64 mmu_base;
9235 
9236 	switch (event_type) {
9237 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9238 	case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9239 		mmu_base = get_hmmu_base(event_type);
9240 		break;
9241 
9242 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9243 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9244 		is_pmmu = true;
9245 		mmu_base = mmPMMU_HBW_MMU_BASE;
9246 		break;
9247 	default:
9248 		return 0;
9249 	}
9250 
9251 	if (mmu_base == ULONG_MAX)
9252 		return 0;
9253 
9254 	error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base,
9255 							is_pmmu, event_mask);
9256 	hl_check_for_glbl_errors(hdev);
9257 
9258 	return error_count;
9259 }
9260 
9261 
9262 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */
gaudi2_hbm_sei_handle_read_err(struct hl_device * hdev,struct hl_eq_hbm_sei_read_err_intr_info * rd_err_data,u32 err_cnt)9263 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
9264 			struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
9265 {
9266 	bool require_hard_reset = false;
9267 	u32 addr, beat, beat_shift;
9268 
9269 	dev_err_ratelimited(hdev->dev,
9270 			"READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
9271 			FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt),
9272 			FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt),
9273 			FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt));
9274 
9275 	addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val);
9276 	dev_err_ratelimited(hdev->dev,
9277 			"READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n",
9278 			FIELD_GET(HBM_RD_ADDR_SID_MASK, addr),
9279 			FIELD_GET(HBM_RD_ADDR_BG_MASK, addr),
9280 			FIELD_GET(HBM_RD_ADDR_BA_MASK, addr),
9281 			FIELD_GET(HBM_RD_ADDR_COL_MASK, addr),
9282 			FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr));
9283 
9284 	/* For each beat (RDQS edge), look for possible errors and print relevant info */
9285 	for (beat = 0 ; beat < 4 ; beat++) {
9286 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9287 			(HBM_RD_ERR_SERR_BEAT0_MASK << beat))
9288 			dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n",
9289 						beat,
9290 						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9291 						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9292 
9293 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9294 			(HBM_RD_ERR_DERR_BEAT0_MASK << beat)) {
9295 			dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n",
9296 						beat,
9297 						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9298 						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9299 			require_hard_reset = true;
9300 		}
9301 
9302 		beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
9303 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9304 			(HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) {
9305 			dev_err_ratelimited(hdev->dev,
9306 					"Beat%d read PARITY: DM: %#x, PAR data: %#x\n",
9307 					beat,
9308 					le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9309 					(le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9310 						(HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
9311 						(HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
9312 			require_hard_reset = true;
9313 		}
9314 
9315 		dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
9316 		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9317 					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2]));
9318 		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9319 					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
9320 	}
9321 
9322 	return require_hard_reset;
9323 }
9324 
gaudi2_hbm_sei_print_wr_par_info(struct hl_device * hdev,struct hl_eq_hbm_sei_wr_par_intr_info * wr_par_err_data,u32 err_cnt)9325 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
9326 			struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt)
9327 {
9328 	struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds;
9329 	u32 i, curr_addr, derr = wr_par_err_data->dbg_derr;
9330 
9331 	dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt);
9332 
9333 	dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n",
9334 				derr & 0x3, derr & 0xc);
9335 
9336 	/* JIRA H6-3286 - the following prints may not be valid */
9337 	dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n");
9338 	for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) {
9339 		curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr);
9340 		dev_err_ratelimited(hdev->dev,
9341 				"\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n",
9342 				i,
9343 				FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr),
9344 				FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr),
9345 				FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr),
9346 				FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr));
9347 	}
9348 }
9349 
gaudi2_hbm_sei_print_ca_par_info(struct hl_device * hdev,struct hl_eq_hbm_sei_ca_par_intr_info * ca_par_err_data,u32 err_cnt)9350 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev,
9351 		struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt)
9352 {
9353 	__le32 *col_cmd = ca_par_err_data->dbg_col;
9354 	__le16 *row_cmd = ca_par_err_data->dbg_row;
9355 	u32 i;
9356 
9357 	dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt);
9358 
9359 	dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n");
9360 	for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++)
9361 		dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i,
9362 			le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0),
9363 			le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0));
9364 }
9365 
9366 /* Returns true if hard reset is needed or false otherwise */
gaudi2_handle_hbm_mc_sei_err(struct hl_device * hdev,u16 event_type,struct hl_eq_hbm_sei_data * sei_data)9367 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
9368 					struct hl_eq_hbm_sei_data *sei_data)
9369 {
9370 	bool require_hard_reset = false;
9371 	u32 hbm_id, mc_id, cause_idx;
9372 
9373 	hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4;
9374 	mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2;
9375 
9376 	cause_idx = sei_data->hdr.sei_cause;
9377 	if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
9378 		gaudi2_print_event(hdev, event_type, true,
9379 			"err cause: %s",
9380 			"Invalid HBM SEI event cause (%d) provided by FW", cause_idx);
9381 		return true;
9382 	}
9383 
9384 	gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical,
9385 		"System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s",
9386 		sei_data->hdr.is_critical ? "Critical" : "Non-critical",
9387 		hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
9388 		hbm_mc_sei_cause[cause_idx]);
9389 
9390 	/* Print error-specific info */
9391 	switch (cause_idx) {
9392 	case HBM_SEI_CATTRIP:
9393 		require_hard_reset = true;
9394 		break;
9395 
9396 	case  HBM_SEI_CMD_PARITY_EVEN:
9397 		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info,
9398 						le32_to_cpu(sei_data->hdr.cnt));
9399 		require_hard_reset = true;
9400 		break;
9401 
9402 	case  HBM_SEI_CMD_PARITY_ODD:
9403 		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info,
9404 						le32_to_cpu(sei_data->hdr.cnt));
9405 		require_hard_reset = true;
9406 		break;
9407 
9408 	case HBM_SEI_WRITE_DATA_PARITY_ERR:
9409 		gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info,
9410 						le32_to_cpu(sei_data->hdr.cnt));
9411 		require_hard_reset = true;
9412 		break;
9413 
9414 	case HBM_SEI_READ_ERR:
9415 		/* Unlike other SEI events, read error requires further processing of the
9416 		 * raw data in order to determine the root cause.
9417 		 */
9418 		require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev,
9419 								&sei_data->read_err_info,
9420 								le32_to_cpu(sei_data->hdr.cnt));
9421 		break;
9422 
9423 	default:
9424 		break;
9425 	}
9426 
9427 	require_hard_reset |= !!sei_data->hdr.is_critical;
9428 
9429 	return require_hard_reset;
9430 }
9431 
gaudi2_handle_hbm_cattrip(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)9432 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type,
9433 				u64 intr_cause_data)
9434 {
9435 	if (intr_cause_data) {
9436 		gaudi2_print_event(hdev, event_type, true,
9437 			"temperature error cause: %#llx", intr_cause_data);
9438 		return 1;
9439 	}
9440 
9441 	return 0;
9442 }
9443 
gaudi2_handle_hbm_mc_spi(struct hl_device * hdev,u64 intr_cause_data)9444 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data)
9445 {
9446 	u32 i, error_count = 0;
9447 
9448 	for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++)
9449 		if (intr_cause_data & hbm_mc_spi[i].mask) {
9450 			dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n",
9451 				hbm_mc_spi[i].cause);
9452 			error_count++;
9453 		}
9454 
9455 	return error_count;
9456 }
9457 
gaudi2_print_clk_change_info(struct hl_device * hdev,u16 event_type,u64 * event_mask)9458 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9459 {
9460 	ktime_t zero_time = ktime_set(0, 0);
9461 
9462 	mutex_lock(&hdev->clk_throttling.lock);
9463 
9464 	switch (event_type) {
9465 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9466 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
9467 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
9468 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
9469 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
9470 		dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
9471 		break;
9472 
9473 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9474 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
9475 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
9476 		dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
9477 		break;
9478 
9479 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9480 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
9481 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
9482 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
9483 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
9484 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9485 		dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
9486 		break;
9487 
9488 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9489 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
9490 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
9491 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9492 		dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
9493 		break;
9494 
9495 	default:
9496 		dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
9497 		break;
9498 	}
9499 
9500 	mutex_unlock(&hdev->clk_throttling.lock);
9501 }
9502 
gaudi2_print_out_of_sync_info(struct hl_device * hdev,u16 event_type,struct cpucp_pkt_sync_err * sync_err)9503 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type,
9504 					struct cpucp_pkt_sync_err *sync_err)
9505 {
9506 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9507 
9508 	gaudi2_print_event(hdev, event_type, false,
9509 		"FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9510 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci),
9511 		q->pi, atomic_read(&q->ci));
9512 }
9513 
gaudi2_handle_pcie_p2p_msix(struct hl_device * hdev,u16 event_type)9514 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
9515 {
9516 	u32 p2p_intr, msix_gw_intr, error_count = 0;
9517 
9518 	p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR);
9519 	msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR);
9520 
9521 	if (p2p_intr) {
9522 		gaudi2_print_event(hdev, event_type, true,
9523 			"pcie p2p transaction terminated due to security, req_id(0x%x)",
9524 			RREG32(mmPCIE_WRAP_P2P_REQ_ID));
9525 
9526 		WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
9527 		error_count++;
9528 	}
9529 
9530 	if (msix_gw_intr) {
9531 		gaudi2_print_event(hdev, event_type, true,
9532 			"pcie msi-x gen denied due to vector num check failure, vec(0x%X)",
9533 			RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
9534 
9535 		WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
9536 		error_count++;
9537 	}
9538 
9539 	return error_count;
9540 }
9541 
gaudi2_handle_pcie_drain(struct hl_device * hdev,struct hl_eq_pcie_drain_ind_data * drain_data)9542 static int gaudi2_handle_pcie_drain(struct hl_device *hdev,
9543 			struct hl_eq_pcie_drain_ind_data *drain_data)
9544 {
9545 	u64 cause, error_count = 0;
9546 
9547 	cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
9548 
9549 	if (cause & BIT_ULL(0)) {
9550 		dev_err_ratelimited(hdev->dev, "PCIE AXI drain LBW completed\n");
9551 		error_count++;
9552 	}
9553 
9554 	if (cause & BIT_ULL(1)) {
9555 		dev_err_ratelimited(hdev->dev, "PCIE AXI drain HBW completed\n");
9556 		error_count++;
9557 	}
9558 
9559 	return error_count;
9560 }
9561 
gaudi2_handle_psoc_drain(struct hl_device * hdev,u64 intr_cause_data)9562 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data)
9563 {
9564 	u32 error_count = 0;
9565 	int i;
9566 
9567 	for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) {
9568 		if (intr_cause_data & BIT_ULL(i)) {
9569 			dev_err_ratelimited(hdev->dev, "PSOC %s completed\n",
9570 				gaudi2_psoc_axi_drain_interrupts_cause[i]);
9571 			error_count++;
9572 		}
9573 	}
9574 
9575 	hl_check_for_glbl_errors(hdev);
9576 
9577 	return error_count;
9578 }
9579 
gaudi2_print_cpu_pkt_failure_info(struct hl_device * hdev,u16 event_type,struct cpucp_pkt_sync_err * sync_err)9580 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type,
9581 					struct cpucp_pkt_sync_err *sync_err)
9582 {
9583 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9584 
9585 	gaudi2_print_event(hdev, event_type, false,
9586 		"FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9587 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
9588 }
9589 
hl_arc_event_handle(struct hl_device * hdev,u16 event_type,struct hl_eq_engine_arc_intr_data * data)9590 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
9591 					struct hl_eq_engine_arc_intr_data *data)
9592 {
9593 	struct hl_engine_arc_dccm_queue_full_irq *q;
9594 	u32 intr_type, engine_id;
9595 	u64 payload;
9596 
9597 	intr_type = le32_to_cpu(data->intr_type);
9598 	engine_id = le32_to_cpu(data->engine_id);
9599 	payload = le64_to_cpu(data->payload);
9600 
9601 	switch (intr_type) {
9602 	case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ:
9603 		q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
9604 
9605 		gaudi2_print_event(hdev, event_type, true,
9606 				"ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u",
9607 				engine_id, intr_type, q->queue_index);
9608 		return 1;
9609 	default:
9610 		gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type");
9611 		return 0;
9612 	}
9613 }
9614 
event_id_to_engine_id(struct hl_device * hdev,u16 event_type)9615 static u16 event_id_to_engine_id(struct hl_device *hdev, u16 event_type)
9616 {
9617 	enum gaudi2_block_types type = GAUDI2_BLOCK_TYPE_MAX;
9618 	u16 index;
9619 
9620 	switch (event_type) {
9621 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
9622 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
9623 		type = GAUDI2_BLOCK_TYPE_TPC;
9624 		break;
9625 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC24_QM:
9626 		index = event_type - GAUDI2_EVENT_TPC0_QM;
9627 		type = GAUDI2_BLOCK_TYPE_TPC;
9628 		break;
9629 	case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
9630 	case GAUDI2_EVENT_MME0_SPI_BASE ... GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
9631 	case GAUDI2_EVENT_MME0_QM:
9632 		index = 0;
9633 		type = GAUDI2_BLOCK_TYPE_MME;
9634 		break;
9635 	case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
9636 	case GAUDI2_EVENT_MME1_SPI_BASE ... GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
9637 	case GAUDI2_EVENT_MME1_QM:
9638 		index = 1;
9639 		type = GAUDI2_BLOCK_TYPE_MME;
9640 		break;
9641 	case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
9642 	case GAUDI2_EVENT_MME2_SPI_BASE ... GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
9643 	case GAUDI2_EVENT_MME2_QM:
9644 		index = 2;
9645 		type = GAUDI2_BLOCK_TYPE_MME;
9646 		break;
9647 	case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
9648 	case GAUDI2_EVENT_MME3_SPI_BASE ... GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
9649 	case GAUDI2_EVENT_MME3_QM:
9650 		index = 3;
9651 		type = GAUDI2_BLOCK_TYPE_MME;
9652 		break;
9653 	case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
9654 	case GAUDI2_EVENT_KDMA_BM_SPMU:
9655 	case GAUDI2_EVENT_KDMA0_CORE:
9656 		return GAUDI2_ENGINE_ID_KDMA;
9657 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
9658 	case GAUDI2_EVENT_PDMA0_CORE:
9659 	case GAUDI2_EVENT_PDMA0_BM_SPMU:
9660 	case GAUDI2_EVENT_PDMA0_QM:
9661 		return GAUDI2_ENGINE_ID_PDMA_0;
9662 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
9663 	case GAUDI2_EVENT_PDMA1_CORE:
9664 	case GAUDI2_EVENT_PDMA1_BM_SPMU:
9665 	case GAUDI2_EVENT_PDMA1_QM:
9666 		return GAUDI2_ENGINE_ID_PDMA_1;
9667 	case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
9668 		index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
9669 		type = GAUDI2_BLOCK_TYPE_DEC;
9670 		break;
9671 	case GAUDI2_EVENT_DEC0_SPI ... GAUDI2_EVENT_DEC9_BMON_SPMU:
9672 		index = (event_type - GAUDI2_EVENT_DEC0_SPI) >> 1;
9673 		type = GAUDI2_BLOCK_TYPE_DEC;
9674 		break;
9675 	case GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE:
9676 		index = event_type - GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE;
9677 		return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
9678 	case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
9679 		index = event_type - GAUDI2_EVENT_NIC0_QM0;
9680 		return GAUDI2_ENGINE_ID_NIC0_0 + index;
9681 	case GAUDI2_EVENT_NIC0_BMON_SPMU ... GAUDI2_EVENT_NIC11_SW_ERROR:
9682 		index = event_type - GAUDI2_EVENT_NIC0_BMON_SPMU;
9683 		return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
9684 	case GAUDI2_EVENT_TPC0_BMON_SPMU ... GAUDI2_EVENT_TPC24_KERNEL_ERR:
9685 		index = (event_type - GAUDI2_EVENT_TPC0_BMON_SPMU) >> 1;
9686 		type = GAUDI2_BLOCK_TYPE_TPC;
9687 		break;
9688 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
9689 	case GAUDI2_EVENT_ROTATOR0_BMON_SPMU:
9690 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
9691 		return GAUDI2_ENGINE_ID_ROT_0;
9692 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
9693 	case GAUDI2_EVENT_ROTATOR1_BMON_SPMU:
9694 	case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
9695 		return GAUDI2_ENGINE_ID_ROT_1;
9696 	case GAUDI2_EVENT_HDMA0_BM_SPMU:
9697 	case GAUDI2_EVENT_HDMA0_QM:
9698 	case GAUDI2_EVENT_HDMA0_CORE:
9699 		return GAUDI2_DCORE0_ENGINE_ID_EDMA_0;
9700 	case GAUDI2_EVENT_HDMA1_BM_SPMU:
9701 	case GAUDI2_EVENT_HDMA1_QM:
9702 	case GAUDI2_EVENT_HDMA1_CORE:
9703 		return GAUDI2_DCORE0_ENGINE_ID_EDMA_1;
9704 	case GAUDI2_EVENT_HDMA2_BM_SPMU:
9705 	case GAUDI2_EVENT_HDMA2_QM:
9706 	case GAUDI2_EVENT_HDMA2_CORE:
9707 		return GAUDI2_DCORE1_ENGINE_ID_EDMA_0;
9708 	case GAUDI2_EVENT_HDMA3_BM_SPMU:
9709 	case GAUDI2_EVENT_HDMA3_QM:
9710 	case GAUDI2_EVENT_HDMA3_CORE:
9711 		return GAUDI2_DCORE1_ENGINE_ID_EDMA_1;
9712 	case GAUDI2_EVENT_HDMA4_BM_SPMU:
9713 	case GAUDI2_EVENT_HDMA4_QM:
9714 	case GAUDI2_EVENT_HDMA4_CORE:
9715 		return GAUDI2_DCORE2_ENGINE_ID_EDMA_0;
9716 	case GAUDI2_EVENT_HDMA5_BM_SPMU:
9717 	case GAUDI2_EVENT_HDMA5_QM:
9718 	case GAUDI2_EVENT_HDMA5_CORE:
9719 		return GAUDI2_DCORE2_ENGINE_ID_EDMA_1;
9720 	case GAUDI2_EVENT_HDMA6_BM_SPMU:
9721 	case GAUDI2_EVENT_HDMA6_QM:
9722 	case GAUDI2_EVENT_HDMA6_CORE:
9723 		return GAUDI2_DCORE3_ENGINE_ID_EDMA_0;
9724 	case GAUDI2_EVENT_HDMA7_BM_SPMU:
9725 	case GAUDI2_EVENT_HDMA7_QM:
9726 	case GAUDI2_EVENT_HDMA7_CORE:
9727 		return GAUDI2_DCORE3_ENGINE_ID_EDMA_1;
9728 	default:
9729 		break;
9730 	}
9731 
9732 	switch (type) {
9733 	case GAUDI2_BLOCK_TYPE_TPC:
9734 		switch (index) {
9735 		case TPC_ID_DCORE0_TPC0 ... TPC_ID_DCORE0_TPC5:
9736 			return GAUDI2_DCORE0_ENGINE_ID_TPC_0 + index;
9737 		case TPC_ID_DCORE1_TPC0 ... TPC_ID_DCORE1_TPC5:
9738 			return GAUDI2_DCORE1_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE1_TPC0;
9739 		case TPC_ID_DCORE2_TPC0 ... TPC_ID_DCORE2_TPC5:
9740 			return GAUDI2_DCORE2_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE2_TPC0;
9741 		case TPC_ID_DCORE3_TPC0 ... TPC_ID_DCORE3_TPC5:
9742 			return GAUDI2_DCORE3_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE3_TPC0;
9743 		default:
9744 			break;
9745 		}
9746 		break;
9747 	case GAUDI2_BLOCK_TYPE_MME:
9748 		switch (index) {
9749 		case MME_ID_DCORE0: return GAUDI2_DCORE0_ENGINE_ID_MME;
9750 		case MME_ID_DCORE1: return GAUDI2_DCORE1_ENGINE_ID_MME;
9751 		case MME_ID_DCORE2: return GAUDI2_DCORE2_ENGINE_ID_MME;
9752 		case MME_ID_DCORE3: return GAUDI2_DCORE3_ENGINE_ID_MME;
9753 		default:
9754 			break;
9755 		}
9756 		break;
9757 	case GAUDI2_BLOCK_TYPE_DEC:
9758 		switch (index) {
9759 		case DEC_ID_DCORE0_DEC0: return GAUDI2_DCORE0_ENGINE_ID_DEC_0;
9760 		case DEC_ID_DCORE0_DEC1: return GAUDI2_DCORE0_ENGINE_ID_DEC_1;
9761 		case DEC_ID_DCORE1_DEC0: return GAUDI2_DCORE1_ENGINE_ID_DEC_0;
9762 		case DEC_ID_DCORE1_DEC1: return GAUDI2_DCORE1_ENGINE_ID_DEC_1;
9763 		case DEC_ID_DCORE2_DEC0: return GAUDI2_DCORE2_ENGINE_ID_DEC_0;
9764 		case DEC_ID_DCORE2_DEC1: return GAUDI2_DCORE2_ENGINE_ID_DEC_1;
9765 		case DEC_ID_DCORE3_DEC0: return GAUDI2_DCORE3_ENGINE_ID_DEC_0;
9766 		case DEC_ID_DCORE3_DEC1: return GAUDI2_DCORE3_ENGINE_ID_DEC_1;
9767 		case DEC_ID_PCIE_VDEC0: return GAUDI2_PCIE_ENGINE_ID_DEC_0;
9768 		case DEC_ID_PCIE_VDEC1: return GAUDI2_PCIE_ENGINE_ID_DEC_1;
9769 		default:
9770 			break;
9771 		}
9772 		break;
9773 	default:
9774 		break;
9775 	}
9776 
9777 	return U16_MAX;
9778 }
9779 
gaudi2_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)9780 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
9781 {
9782 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9783 	bool reset_required = false, is_critical = false;
9784 	u32 index, ctl, reset_flags = 0, error_count = 0;
9785 	u64 event_mask = 0;
9786 	u16 event_type;
9787 
9788 	ctl = le32_to_cpu(eq_entry->hdr.ctl);
9789 	event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
9790 
9791 	if (event_type >= GAUDI2_EVENT_SIZE) {
9792 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
9793 				event_type, GAUDI2_EVENT_SIZE - 1);
9794 		return;
9795 	}
9796 
9797 	gaudi2->events_stat[event_type]++;
9798 	gaudi2->events_stat_aggregate[event_type]++;
9799 
9800 	switch (event_type) {
9801 	case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR:
9802 		fallthrough;
9803 	case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
9804 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9805 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9806 		reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
9807 		is_critical = eq_entry->ecc_data.is_critical;
9808 		error_count++;
9809 		break;
9810 
9811 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
9812 		fallthrough;
9813 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM:
9814 		fallthrough;
9815 	case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
9816 		error_count = gaudi2_handle_qman_err(hdev, event_type, &event_mask);
9817 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9818 		break;
9819 
9820 	case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
9821 		error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type, &event_mask);
9822 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9823 		break;
9824 
9825 	case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
9826 		error_count = gaudi2_handle_cpu_sei_err(hdev, event_type);
9827 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9828 		event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR;
9829 		break;
9830 
9831 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
9832 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
9833 		error_count = gaudi2_handle_qm_sei_err(hdev, event_type, true, &event_mask);
9834 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9835 		break;
9836 
9837 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
9838 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
9839 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
9840 		error_count = gaudi2_handle_rot_err(hdev, index, event_type,
9841 					&eq_entry->razwi_with_intr_cause, &event_mask);
9842 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9843 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9844 		break;
9845 
9846 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
9847 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
9848 		error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9849 						&eq_entry->razwi_with_intr_cause, &event_mask);
9850 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9851 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9852 		break;
9853 
9854 	case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
9855 		index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
9856 		error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9857 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9858 		break;
9859 
9860 	case GAUDI2_EVENT_TPC0_KERNEL_ERR:
9861 	case GAUDI2_EVENT_TPC1_KERNEL_ERR:
9862 	case GAUDI2_EVENT_TPC2_KERNEL_ERR:
9863 	case GAUDI2_EVENT_TPC3_KERNEL_ERR:
9864 	case GAUDI2_EVENT_TPC4_KERNEL_ERR:
9865 	case GAUDI2_EVENT_TPC5_KERNEL_ERR:
9866 	case GAUDI2_EVENT_TPC6_KERNEL_ERR:
9867 	case GAUDI2_EVENT_TPC7_KERNEL_ERR:
9868 	case GAUDI2_EVENT_TPC8_KERNEL_ERR:
9869 	case GAUDI2_EVENT_TPC9_KERNEL_ERR:
9870 	case GAUDI2_EVENT_TPC10_KERNEL_ERR:
9871 	case GAUDI2_EVENT_TPC11_KERNEL_ERR:
9872 	case GAUDI2_EVENT_TPC12_KERNEL_ERR:
9873 	case GAUDI2_EVENT_TPC13_KERNEL_ERR:
9874 	case GAUDI2_EVENT_TPC14_KERNEL_ERR:
9875 	case GAUDI2_EVENT_TPC15_KERNEL_ERR:
9876 	case GAUDI2_EVENT_TPC16_KERNEL_ERR:
9877 	case GAUDI2_EVENT_TPC17_KERNEL_ERR:
9878 	case GAUDI2_EVENT_TPC18_KERNEL_ERR:
9879 	case GAUDI2_EVENT_TPC19_KERNEL_ERR:
9880 	case GAUDI2_EVENT_TPC20_KERNEL_ERR:
9881 	case GAUDI2_EVENT_TPC21_KERNEL_ERR:
9882 	case GAUDI2_EVENT_TPC22_KERNEL_ERR:
9883 	case GAUDI2_EVENT_TPC23_KERNEL_ERR:
9884 	case GAUDI2_EVENT_TPC24_KERNEL_ERR:
9885 		index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
9886 			(GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
9887 		error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9888 					&eq_entry->razwi_with_intr_cause, &event_mask);
9889 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9890 		break;
9891 
9892 	case GAUDI2_EVENT_DEC0_SPI:
9893 	case GAUDI2_EVENT_DEC1_SPI:
9894 	case GAUDI2_EVENT_DEC2_SPI:
9895 	case GAUDI2_EVENT_DEC3_SPI:
9896 	case GAUDI2_EVENT_DEC4_SPI:
9897 	case GAUDI2_EVENT_DEC5_SPI:
9898 	case GAUDI2_EVENT_DEC6_SPI:
9899 	case GAUDI2_EVENT_DEC7_SPI:
9900 	case GAUDI2_EVENT_DEC8_SPI:
9901 	case GAUDI2_EVENT_DEC9_SPI:
9902 		index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
9903 				(GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
9904 		error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9905 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9906 		break;
9907 
9908 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
9909 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
9910 	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
9911 	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
9912 		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
9913 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
9914 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
9915 		error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9916 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9917 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9918 		break;
9919 
9920 	case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
9921 	case GAUDI2_EVENT_MME1_QMAN_SW_ERROR:
9922 	case GAUDI2_EVENT_MME2_QMAN_SW_ERROR:
9923 	case GAUDI2_EVENT_MME3_QMAN_SW_ERROR:
9924 		index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
9925 				(GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
9926 					GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
9927 		error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9928 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9929 		break;
9930 
9931 	case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
9932 	case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
9933 	case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
9934 	case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
9935 		index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
9936 				(GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
9937 					GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
9938 		error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type, &event_mask);
9939 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9940 		break;
9941 
9942 	case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
9943 	case GAUDI2_EVENT_KDMA0_CORE:
9944 		error_count = gaudi2_handle_kdma_core_event(hdev, event_type,
9945 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9946 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9947 		break;
9948 
9949 	case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_HDMA5_CORE:
9950 		error_count = gaudi2_handle_dma_core_event(hdev, event_type,
9951 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9952 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9953 		break;
9954 
9955 	case GAUDI2_EVENT_PDMA0_CORE ... GAUDI2_EVENT_PDMA1_CORE:
9956 		error_count = gaudi2_handle_dma_core_event(hdev, event_type,
9957 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9958 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9959 		break;
9960 
9961 	case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
9962 		error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type,
9963 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask);
9964 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9965 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9966 		break;
9967 
9968 	case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9969 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9970 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9971 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9972 		error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask);
9973 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9974 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9975 		break;
9976 
9977 	case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
9978 		error_count = gaudi2_handle_hif_fatal(hdev, event_type,
9979 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9980 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9981 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9982 		break;
9983 
9984 	case GAUDI2_EVENT_PMMU_FATAL_0:
9985 		error_count = gaudi2_handle_pif_fatal(hdev, event_type,
9986 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9987 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9988 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9989 		break;
9990 
9991 	case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
9992 		error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask);
9993 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9994 		break;
9995 
9996 	case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
9997 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9998 		if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
9999 			reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10000 			reset_required = true;
10001 			is_critical = eq_entry->sei_data.hdr.is_critical;
10002 		}
10003 		error_count++;
10004 		break;
10005 
10006 	case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
10007 		error_count = gaudi2_handle_hbm_cattrip(hdev, event_type,
10008 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
10009 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10010 		break;
10011 
10012 	case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
10013 		error_count = gaudi2_handle_hbm_mc_spi(hdev,
10014 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
10015 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10016 		break;
10017 
10018 	case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
10019 		error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
10020 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10021 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10022 		if (hl_fw_version_cmp(hdev, 1, 13, 0) >= 0)
10023 			is_critical = true;
10024 		break;
10025 
10026 	case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
10027 		error_count = gaudi2_handle_psoc_drain(hdev,
10028 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
10029 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10030 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10031 		break;
10032 
10033 	case GAUDI2_EVENT_CPU_AXI_ECC:
10034 		error_count = GAUDI2_NA_EVENT_CAUSE;
10035 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10036 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10037 		break;
10038 	case GAUDI2_EVENT_CPU_L2_RAM_ECC:
10039 		error_count = GAUDI2_NA_EVENT_CAUSE;
10040 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10041 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10042 		break;
10043 	case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
10044 	case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
10045 	case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
10046 	case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
10047 		error_count = gaudi2_handle_mme_sbte_err(hdev, event_type);
10048 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10049 		break;
10050 	case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
10051 		error_count = GAUDI2_NA_EVENT_CAUSE;
10052 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10053 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10054 		break;
10055 	case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
10056 		error_count = GAUDI2_NA_EVENT_CAUSE;
10057 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10058 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10059 		break;
10060 	case GAUDI2_EVENT_PSOC_PRSTN_FALL:
10061 		error_count = GAUDI2_NA_EVENT_CAUSE;
10062 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10063 		break;
10064 	case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
10065 		error_count = GAUDI2_NA_EVENT_CAUSE;
10066 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10067 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10068 		break;
10069 	case GAUDI2_EVENT_PCIE_FATAL_ERR:
10070 		error_count = GAUDI2_NA_EVENT_CAUSE;
10071 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10072 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10073 		break;
10074 	case GAUDI2_EVENT_TPC0_BMON_SPMU:
10075 	case GAUDI2_EVENT_TPC1_BMON_SPMU:
10076 	case GAUDI2_EVENT_TPC2_BMON_SPMU:
10077 	case GAUDI2_EVENT_TPC3_BMON_SPMU:
10078 	case GAUDI2_EVENT_TPC4_BMON_SPMU:
10079 	case GAUDI2_EVENT_TPC5_BMON_SPMU:
10080 	case GAUDI2_EVENT_TPC6_BMON_SPMU:
10081 	case GAUDI2_EVENT_TPC7_BMON_SPMU:
10082 	case GAUDI2_EVENT_TPC8_BMON_SPMU:
10083 	case GAUDI2_EVENT_TPC9_BMON_SPMU:
10084 	case GAUDI2_EVENT_TPC10_BMON_SPMU:
10085 	case GAUDI2_EVENT_TPC11_BMON_SPMU:
10086 	case GAUDI2_EVENT_TPC12_BMON_SPMU:
10087 	case GAUDI2_EVENT_TPC13_BMON_SPMU:
10088 	case GAUDI2_EVENT_TPC14_BMON_SPMU:
10089 	case GAUDI2_EVENT_TPC15_BMON_SPMU:
10090 	case GAUDI2_EVENT_TPC16_BMON_SPMU:
10091 	case GAUDI2_EVENT_TPC17_BMON_SPMU:
10092 	case GAUDI2_EVENT_TPC18_BMON_SPMU:
10093 	case GAUDI2_EVENT_TPC19_BMON_SPMU:
10094 	case GAUDI2_EVENT_TPC20_BMON_SPMU:
10095 	case GAUDI2_EVENT_TPC21_BMON_SPMU:
10096 	case GAUDI2_EVENT_TPC22_BMON_SPMU:
10097 	case GAUDI2_EVENT_TPC23_BMON_SPMU:
10098 	case GAUDI2_EVENT_TPC24_BMON_SPMU:
10099 	case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU:
10100 	case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU:
10101 	case GAUDI2_EVENT_MME0_WAP_BMON_SPMU:
10102 	case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU:
10103 	case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU:
10104 	case GAUDI2_EVENT_MME1_WAP_BMON_SPMU:
10105 	case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU:
10106 	case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU:
10107 	case GAUDI2_EVENT_MME2_WAP_BMON_SPMU:
10108 	case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU:
10109 	case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU:
10110 	case GAUDI2_EVENT_MME3_WAP_BMON_SPMU:
10111 	case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU:
10112 		fallthrough;
10113 	case GAUDI2_EVENT_DEC0_BMON_SPMU:
10114 	case GAUDI2_EVENT_DEC1_BMON_SPMU:
10115 	case GAUDI2_EVENT_DEC2_BMON_SPMU:
10116 	case GAUDI2_EVENT_DEC3_BMON_SPMU:
10117 	case GAUDI2_EVENT_DEC4_BMON_SPMU:
10118 	case GAUDI2_EVENT_DEC5_BMON_SPMU:
10119 	case GAUDI2_EVENT_DEC6_BMON_SPMU:
10120 	case GAUDI2_EVENT_DEC7_BMON_SPMU:
10121 	case GAUDI2_EVENT_DEC8_BMON_SPMU:
10122 	case GAUDI2_EVENT_DEC9_BMON_SPMU:
10123 	case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
10124 		error_count = GAUDI2_NA_EVENT_CAUSE;
10125 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10126 		break;
10127 
10128 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
10129 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
10130 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
10131 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
10132 		gaudi2_print_clk_change_info(hdev, event_type, &event_mask);
10133 		error_count = GAUDI2_NA_EVENT_CAUSE;
10134 		break;
10135 
10136 	case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
10137 		gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err);
10138 		error_count = GAUDI2_NA_EVENT_CAUSE;
10139 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10140 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10141 		break;
10142 
10143 	case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
10144 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10145 		error_count = GAUDI2_NA_EVENT_CAUSE;
10146 		/* Do nothing- FW will handle it */
10147 		break;
10148 
10149 	case GAUDI2_EVENT_PCIE_P2P_MSIX:
10150 		error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type);
10151 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10152 		break;
10153 
10154 	case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
10155 		index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
10156 		error_count = gaudi2_handle_sm_err(hdev, event_type, index);
10157 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10158 		break;
10159 
10160 	case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
10161 		error_count = GAUDI2_NA_EVENT_CAUSE;
10162 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10163 		break;
10164 
10165 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
10166 		dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
10167 						le64_to_cpu(eq_entry->data[0]));
10168 		error_count = GAUDI2_NA_EVENT_CAUSE;
10169 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10170 		break;
10171 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
10172 		dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
10173 						le64_to_cpu(eq_entry->data[0]));
10174 		error_count = GAUDI2_NA_EVENT_CAUSE;
10175 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10176 		break;
10177 
10178 	case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
10179 		gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err);
10180 		error_count = GAUDI2_NA_EVENT_CAUSE;
10181 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10182 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10183 		break;
10184 
10185 	case GAUDI2_EVENT_ARC_DCCM_FULL:
10186 		error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data);
10187 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10188 		break;
10189 
10190 	case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
10191 	case GAUDI2_EVENT_CPU_DEV_RESET_REQ:
10192 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10193 		error_count = GAUDI2_NA_EVENT_CAUSE;
10194 		is_critical = true;
10195 		break;
10196 
10197 	case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY:
10198 	case GAUDI2_EVENT_ARC_PWR_BRK_EXT:
10199 	case GAUDI2_EVENT_ARC_PWR_RD_MODE0:
10200 	case GAUDI2_EVENT_ARC_PWR_RD_MODE1:
10201 	case GAUDI2_EVENT_ARC_PWR_RD_MODE2:
10202 	case GAUDI2_EVENT_ARC_PWR_RD_MODE3:
10203 		error_count = GAUDI2_NA_EVENT_CAUSE;
10204 		dev_info_ratelimited(hdev->dev, "%s event received\n",
10205 					gaudi2_irq_map_table[event_type].name);
10206 		break;
10207 
10208 	case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
10209 		hl_eq_heartbeat_event_handle(hdev);
10210 		error_count = GAUDI2_NA_EVENT_CAUSE;
10211 		break;
10212 	default:
10213 		if (gaudi2_irq_map_table[event_type].valid) {
10214 			dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
10215 						event_type);
10216 			error_count = GAUDI2_NA_EVENT_CAUSE;
10217 		}
10218 	}
10219 
10220 	if (event_mask & HL_NOTIFIER_EVENT_USER_ENGINE_ERR)
10221 		hl_capture_engine_err(hdev, event_id_to_engine_id(hdev, event_type), error_count);
10222 
10223 	/* Make sure to dump an error in case no error cause was printed so far.
10224 	 * Note that although we have counted the errors, we use this number as
10225 	 * a boolean.
10226 	 */
10227 	if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type))
10228 		gaudi2_print_event(hdev, event_type, true, "%d", event_type);
10229 	else if (error_count == 0)
10230 		gaudi2_print_event(hdev, event_type, true,
10231 				"No error cause for H/W event %u", event_type);
10232 
10233 	if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) || reset_required) {
10234 		if (reset_required ||
10235 				(gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD))
10236 			reset_flags |= HL_DRV_RESET_HARD;
10237 
10238 		if (hdev->hard_reset_on_fw_events ||
10239 				(hdev->asic_prop.fw_security_enabled && is_critical))
10240 			goto reset_device;
10241 	}
10242 
10243 	/* Send unmask irq only for interrupts not classified as MSG */
10244 	if (!gaudi2_irq_map_table[event_type].msg)
10245 		hl_fw_unmask_irq(hdev, event_type);
10246 
10247 	if (event_mask)
10248 		hl_notifier_event_send_all(hdev, event_mask);
10249 
10250 	return;
10251 
10252 reset_device:
10253 	if (hdev->asic_prop.fw_security_enabled && is_critical) {
10254 		reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
10255 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
10256 	} else {
10257 		reset_flags |= HL_DRV_RESET_DELAY;
10258 	}
10259 	/* escalate general hw errors to critical/fatal error */
10260 	if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
10261 		hl_handle_critical_hw_err(hdev, event_type, &event_mask);
10262 
10263 	event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
10264 	hl_device_cond_reset(hdev, reset_flags, event_mask);
10265 }
10266 
gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device * hdev,struct packet_lin_dma * lin_dma_pkt,u64 phys_addr,u32 hw_queue_id,u32 size,u64 addr,u32 val)10267 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
10268 			struct packet_lin_dma *lin_dma_pkt,
10269 			u64 phys_addr, u32 hw_queue_id, u32 size, u64 addr, u32 val)
10270 {
10271 	u32 ctl, pkt_size;
10272 	int rc = 0, i;
10273 
10274 	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
10275 	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
10276 	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1);
10277 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1);
10278 
10279 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
10280 	lin_dma_pkt->src_addr = cpu_to_le64(val);
10281 	lin_dma_pkt->dst_addr = cpu_to_le64(addr);
10282 	lin_dma_pkt->tsize = cpu_to_le32(size);
10283 
10284 	pkt_size = sizeof(struct packet_lin_dma);
10285 
10286 	for (i = 0; i < 3; i++) {
10287 		rc = hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM,
10288 				phys_addr + (i * sizeof(u64)),
10289 				((u64 *)(lin_dma_pkt)) + i, DEBUGFS_WRITE64);
10290 		if (rc) {
10291 			dev_err(hdev->dev, "Failed to copy lin_dma packet to HBM (%#llx)\n",
10292 				phys_addr);
10293 			return rc;
10294 		}
10295 	}
10296 
10297 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, phys_addr);
10298 	if (rc)
10299 		dev_err(hdev->dev, "Failed to send lin_dma packet to H/W queue %d\n",
10300 				hw_queue_id);
10301 
10302 	return rc;
10303 }
10304 
gaudi2_memset_device_memory(struct hl_device * hdev,u64 addr,u64 size,u64 val)10305 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
10306 {
10307 	u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
10308 					GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
10309 					GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
10310 					GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
10311 	u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
10312 		old_mmubp, mmubp, num_of_pkts, busy, pkt_size, cb_len;
10313 	u64 comp_addr, cur_addr = addr, end_addr = addr + size;
10314 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10315 	int rc = 0, dma_num = 0, i;
10316 	void *lin_dma_pkts_arr;
10317 
10318 	if (prop->edma_enabled_mask == 0) {
10319 		dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
10320 		return -EIO;
10321 	}
10322 
10323 	sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10324 	sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
10325 	comp_addr = CFG_BASE + sob_addr;
10326 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
10327 		FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
10328 	mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
10329 		FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
10330 
10331 	/* Calculate how many lin dma pkts we'll need */
10332 	num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
10333 	pkt_size = sizeof(struct packet_lin_dma);
10334 	cb_len = pkt_size * num_of_pkts;
10335 
10336 	/*
10337 	 * if we're not scrubing HMMU or NIC reserved sections in hbm,
10338 	 * then it the scrubing of the user section, as we use the start of the user section
10339 	 * to store the CB of the EDMA QM, so shift the start address of the scrubbing accordingly
10340 	 * and scrub the CB section before leaving this function.
10341 	 */
10342 	if ((addr >= prop->dram_user_base_address) &&
10343 				(addr < prop->dram_user_base_address + cb_len))
10344 		cur_addr += (prop->dram_user_base_address + cb_len) - addr;
10345 
10346 	lin_dma_pkts_arr = kvcalloc(num_of_pkts, pkt_size, GFP_KERNEL);
10347 	if (!lin_dma_pkts_arr)
10348 		return -ENOMEM;
10349 
10350 	/*
10351 	 * set mmu bypass for the scrubbing - all ddmas are configured the same so save
10352 	 * only the first one to restore later
10353 	 * also set the sob addr for all edma cores for completion.
10354 	 * set QM as trusted to allow it to access physical address with MMU bp.
10355 	 */
10356 	old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
10357 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10358 		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10359 			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10360 			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10361 
10362 			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10363 				continue;
10364 
10365 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
10366 					edma_offset, mmubp);
10367 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
10368 					lower_32_bits(comp_addr));
10369 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
10370 					upper_32_bits(comp_addr));
10371 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
10372 					comp_val);
10373 			gaudi2_qman_set_test_mode(hdev,
10374 					edma_queues_id[dcore] + 4 * edma_idx, true);
10375 		}
10376 	}
10377 
10378 	WREG32(sob_addr, 0);
10379 
10380 	while (cur_addr < end_addr) {
10381 		for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10382 			for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10383 				u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10384 
10385 				if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10386 					continue;
10387 
10388 				chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
10389 
10390 				rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
10391 					(struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
10392 					prop->dram_user_base_address + (dma_num * pkt_size),
10393 					edma_queues_id[dcore] + edma_idx * 4,
10394 					chunk_size, cur_addr, val);
10395 				if (rc)
10396 					goto end;
10397 
10398 				dma_num++;
10399 				cur_addr += chunk_size;
10400 				if (cur_addr == end_addr)
10401 					goto edma_wait;
10402 			}
10403 		}
10404 	}
10405 
10406 edma_wait:
10407 	rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
10408 	if (rc) {
10409 		dev_err(hdev->dev, "DMA Timeout during HBM scrubbing(sob: 0x%x, dma_num: 0x%x)\n",
10410 						busy, dma_num);
10411 		goto end;
10412 	}
10413 end:
10414 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10415 		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10416 			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10417 			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10418 
10419 			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10420 				continue;
10421 
10422 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
10423 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0);
10424 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0);
10425 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0);
10426 			gaudi2_qman_set_test_mode(hdev,
10427 					edma_queues_id[dcore] + 4 * edma_idx, false);
10428 		}
10429 	}
10430 
10431 	memset(lin_dma_pkts_arr, 0, sizeof(u64));
10432 
10433 	/* Zero the HBM area where we copied the CB */
10434 	for (i = 0; i < cb_len / sizeof(u64); i += sizeof(u64))
10435 		rc = hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM,
10436 			prop->dram_user_base_address + i,
10437 				(u64 *)(lin_dma_pkts_arr), DEBUGFS_WRITE64);
10438 	WREG32(sob_addr, 0);
10439 
10440 	kfree(lin_dma_pkts_arr);
10441 
10442 	return rc;
10443 }
10444 
gaudi2_scrub_device_dram(struct hl_device * hdev,u64 val)10445 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val)
10446 {
10447 	int rc;
10448 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10449 	u64 size = prop->dram_end_address - prop->dram_user_base_address;
10450 
10451 	rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val);
10452 
10453 	if (rc)
10454 		dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n",
10455 				prop->dram_user_base_address, size);
10456 	return rc;
10457 }
10458 
gaudi2_scrub_device_mem(struct hl_device * hdev)10459 static int gaudi2_scrub_device_mem(struct hl_device *hdev)
10460 {
10461 	int rc;
10462 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10463 	u64 val = hdev->memory_scrub_val;
10464 	u64 addr, size;
10465 
10466 	if (!hdev->memory_scrub)
10467 		return 0;
10468 
10469 	/* scrub SRAM */
10470 	addr = prop->sram_user_base_address;
10471 	size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET);
10472 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n",
10473 			addr, addr + size, val);
10474 	rc = gaudi2_memset_device_memory(hdev, addr, size, val);
10475 	if (rc) {
10476 		dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc);
10477 		return rc;
10478 	}
10479 
10480 	/* scrub DRAM */
10481 	rc = gaudi2_scrub_device_dram(hdev, val);
10482 	if (rc) {
10483 		dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc);
10484 		return rc;
10485 	}
10486 	return 0;
10487 }
10488 
gaudi2_restore_user_sm_registers(struct hl_device * hdev)10489 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
10490 {
10491 	u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr,
10492 		cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr;
10493 	u32 val, size, offset;
10494 	int dcore_id;
10495 
10496 	offset = hdev->asic_prop.first_available_cq[0] * 4;
10497 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset;
10498 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset;
10499 	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset;
10500 	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset;
10501 	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset;
10502 	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset;
10503 	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 -
10504 			(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset);
10505 
10506 	/* memset dcore0 CQ registers */
10507 	gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10508 	gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10509 	gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10510 	gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10511 	gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10512 	gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10513 
10514 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
10515 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
10516 	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET;
10517 	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET;
10518 	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET;
10519 	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET;
10520 	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0;
10521 
10522 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10523 		gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10524 		gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10525 		gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10526 		gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10527 		gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10528 		gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10529 
10530 		cq_lbw_l_addr += DCORE_OFFSET;
10531 		cq_lbw_h_addr += DCORE_OFFSET;
10532 		cq_lbw_data_addr += DCORE_OFFSET;
10533 		cq_base_l_addr += DCORE_OFFSET;
10534 		cq_base_h_addr += DCORE_OFFSET;
10535 		cq_size_addr += DCORE_OFFSET;
10536 	}
10537 
10538 	offset = hdev->asic_prop.first_available_user_mon[0] * 4;
10539 	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset;
10540 	val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT;
10541 	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset);
10542 
10543 	/* memset dcore0 monitors */
10544 	gaudi2_memset_device_lbw(hdev, addr, size, val);
10545 
10546 	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset;
10547 	gaudi2_memset_device_lbw(hdev, addr, size, 0);
10548 
10549 	mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET;
10550 	mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET;
10551 	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0;
10552 
10553 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10554 		gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val);
10555 		gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0);
10556 		mon_sts_addr += DCORE_OFFSET;
10557 		mon_cfg_addr += DCORE_OFFSET;
10558 	}
10559 
10560 	offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10561 	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset;
10562 	val = 0;
10563 	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 -
10564 			(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10565 
10566 	/* memset dcore0 sobs */
10567 	gaudi2_memset_device_lbw(hdev, addr, size, val);
10568 
10569 	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET;
10570 	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0;
10571 
10572 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10573 		gaudi2_memset_device_lbw(hdev, addr, size, val);
10574 		addr += DCORE_OFFSET;
10575 	}
10576 
10577 	/* Flush all WREG to prevent race */
10578 	val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10579 }
10580 
gaudi2_restore_user_qm_registers(struct hl_device * hdev)10581 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev)
10582 {
10583 	u32 reg_base, hw_queue_id;
10584 
10585 	for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0;
10586 							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10587 		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10588 			continue;
10589 
10590 		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10591 
10592 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10593 		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10594 	}
10595 
10596 	/* Flush all WREG to prevent race */
10597 	RREG32(mmPDMA0_QM_ARB_CFG_0);
10598 }
10599 
gaudi2_restore_nic_qm_registers(struct hl_device * hdev)10600 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev)
10601 {
10602 	u32 reg_base, hw_queue_id;
10603 
10604 	for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3;
10605 							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10606 		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10607 			continue;
10608 
10609 		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10610 
10611 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10612 		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10613 	}
10614 
10615 	/* Flush all WREG to prevent race */
10616 	RREG32(mmPDMA0_QM_ARB_CFG_0);
10617 }
10618 
gaudi2_context_switch(struct hl_device * hdev,u32 asid)10619 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid)
10620 {
10621 	return 0;
10622 }
10623 
gaudi2_restore_phase_topology(struct hl_device * hdev)10624 static void gaudi2_restore_phase_topology(struct hl_device *hdev)
10625 {
10626 }
10627 
gaudi2_init_block_instances(struct hl_device * hdev,u32 block_idx,struct dup_block_ctx * cfg_ctx)10628 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx,
10629 						struct dup_block_ctx *cfg_ctx)
10630 {
10631 	u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off;
10632 	u8 seq;
10633 	int i;
10634 
10635 	for (i = 0 ; i < cfg_ctx->instances ; i++) {
10636 		seq = block_idx * cfg_ctx->instances + i;
10637 
10638 		/* skip disabled instance */
10639 		if (!(cfg_ctx->enabled_mask & BIT_ULL(seq)))
10640 			continue;
10641 
10642 		cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off,
10643 					cfg_ctx->data);
10644 	}
10645 }
10646 
gaudi2_init_blocks_with_mask(struct hl_device * hdev,struct dup_block_ctx * cfg_ctx,u64 mask)10647 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx,
10648 						u64 mask)
10649 {
10650 	int i;
10651 
10652 	cfg_ctx->enabled_mask = mask;
10653 
10654 	for (i = 0 ; i < cfg_ctx->blocks ; i++)
10655 		gaudi2_init_block_instances(hdev, i, cfg_ctx);
10656 }
10657 
gaudi2_init_blocks(struct hl_device * hdev,struct dup_block_ctx * cfg_ctx)10658 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx)
10659 {
10660 	gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX);
10661 }
10662 
gaudi2_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)10663 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
10664 {
10665 	void *host_mem_virtual_addr;
10666 	dma_addr_t host_mem_dma_addr;
10667 	u64 reserved_va_base;
10668 	u32 pos, size_left, size_to_dma;
10669 	struct hl_ctx *ctx;
10670 	int rc = 0;
10671 
10672 	/* Fetch the ctx */
10673 	ctx = hl_get_compute_ctx(hdev);
10674 	if (!ctx) {
10675 		dev_err(hdev->dev, "No ctx available\n");
10676 		return -EINVAL;
10677 	}
10678 
10679 	/* Allocate buffers for read and for poll */
10680 	host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr,
10681 								GFP_KERNEL | __GFP_ZERO);
10682 	if (host_mem_virtual_addr == NULL) {
10683 		dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n");
10684 		rc = -ENOMEM;
10685 		goto put_ctx;
10686 	}
10687 
10688 	/* Reserve VM region on asic side */
10689 	reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M,
10690 						HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10691 	if (!reserved_va_base) {
10692 		dev_err(hdev->dev, "Failed to reserve vmem on asic\n");
10693 		rc = -ENOMEM;
10694 		goto free_data_buffer;
10695 	}
10696 
10697 	/* Create mapping on asic side */
10698 	mutex_lock(&hdev->mmu_lock);
10699 
10700 	rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
10701 	if (rc) {
10702 		dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
10703 		goto unreserve_va;
10704 	}
10705 
10706 	rc = hl_mmu_invalidate_cache_range(hdev, false,
10707 				      MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
10708 				      ctx->asid, reserved_va_base, SZ_2M);
10709 	if (rc) {
10710 		hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10711 		goto unreserve_va;
10712 	}
10713 
10714 	mutex_unlock(&hdev->mmu_lock);
10715 
10716 	/* Enable MMU on KDMA */
10717 	gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
10718 
10719 	pos = 0;
10720 	size_left = size;
10721 	size_to_dma = SZ_2M;
10722 
10723 	while (size_left > 0) {
10724 		if (size_left < SZ_2M)
10725 			size_to_dma = size_left;
10726 
10727 		rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false);
10728 		if (rc)
10729 			break;
10730 
10731 		memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma);
10732 
10733 		if (size_left <= SZ_2M)
10734 			break;
10735 
10736 		pos += SZ_2M;
10737 		addr += SZ_2M;
10738 		size_left -= SZ_2M;
10739 	}
10740 
10741 	gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
10742 
10743 	mutex_lock(&hdev->mmu_lock);
10744 
10745 	rc = hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10746 	if (rc)
10747 		goto unreserve_va;
10748 
10749 	rc = hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
10750 				      ctx->asid, reserved_va_base, SZ_2M);
10751 
10752 unreserve_va:
10753 	mutex_unlock(&hdev->mmu_lock);
10754 	hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
10755 free_data_buffer:
10756 	hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
10757 put_ctx:
10758 	hl_ctx_put(ctx);
10759 
10760 	return rc;
10761 }
10762 
gaudi2_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)10763 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx)
10764 {
10765 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10766 	int min_alloc_order, rc;
10767 
10768 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10769 		return 0;
10770 
10771 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
10772 								HOST_SPACE_INTERNAL_CB_SZ,
10773 								&hdev->internal_cb_pool_dma_addr,
10774 								GFP_KERNEL | __GFP_ZERO);
10775 
10776 	if (!hdev->internal_cb_pool_virt_addr)
10777 		return -ENOMEM;
10778 
10779 	min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev),
10780 					gaudi2_get_wait_cb_size(hdev)));
10781 
10782 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
10783 	if (!hdev->internal_cb_pool) {
10784 		dev_err(hdev->dev, "Failed to create internal CB pool\n");
10785 		rc = -ENOMEM;
10786 		goto free_internal_cb_pool;
10787 	}
10788 
10789 	rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr,
10790 				HOST_SPACE_INTERNAL_CB_SZ, -1);
10791 	if (rc) {
10792 		dev_err(hdev->dev, "Failed to add memory to internal CB pool\n");
10793 		rc = -EFAULT;
10794 		goto destroy_internal_cb_pool;
10795 	}
10796 
10797 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
10798 					HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10799 
10800 	if (!hdev->internal_cb_va_base) {
10801 		rc = -ENOMEM;
10802 		goto destroy_internal_cb_pool;
10803 	}
10804 
10805 	mutex_lock(&hdev->mmu_lock);
10806 
10807 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
10808 					HOST_SPACE_INTERNAL_CB_SZ);
10809 	if (rc)
10810 		goto unreserve_internal_cb_pool;
10811 
10812 	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
10813 	if (rc)
10814 		goto unmap_internal_cb_pool;
10815 
10816 	mutex_unlock(&hdev->mmu_lock);
10817 
10818 	return 0;
10819 
10820 unmap_internal_cb_pool:
10821 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10822 unreserve_internal_cb_pool:
10823 	mutex_unlock(&hdev->mmu_lock);
10824 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10825 destroy_internal_cb_pool:
10826 	gen_pool_destroy(hdev->internal_cb_pool);
10827 free_internal_cb_pool:
10828 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10829 					hdev->internal_cb_pool_dma_addr);
10830 
10831 	return rc;
10832 }
10833 
gaudi2_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)10834 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx)
10835 {
10836 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10837 
10838 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10839 		return;
10840 
10841 	mutex_lock(&hdev->mmu_lock);
10842 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10843 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10844 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
10845 	mutex_unlock(&hdev->mmu_lock);
10846 
10847 	gen_pool_destroy(hdev->internal_cb_pool);
10848 
10849 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10850 					hdev->internal_cb_pool_dma_addr);
10851 }
10852 
gaudi2_restore_user_registers(struct hl_device * hdev)10853 static void gaudi2_restore_user_registers(struct hl_device *hdev)
10854 {
10855 	gaudi2_restore_user_sm_registers(hdev);
10856 	gaudi2_restore_user_qm_registers(hdev);
10857 }
10858 
gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx * ctx)10859 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10860 {
10861 	struct hl_device *hdev = ctx->hdev;
10862 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10863 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10864 	int rc;
10865 
10866 	rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10867 				gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
10868 	if (rc)
10869 		dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
10870 			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10871 
10872 	return rc;
10873 }
10874 
gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx * ctx)10875 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10876 {
10877 	struct hl_device *hdev = ctx->hdev;
10878 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10879 	int rc;
10880 
10881 	rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10882 				prop->pmmu.page_size, true);
10883 	if (rc)
10884 		dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
10885 			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10886 }
10887 
gaudi2_ctx_init(struct hl_ctx * ctx)10888 static int gaudi2_ctx_init(struct hl_ctx *ctx)
10889 {
10890 	int rc;
10891 
10892 	if (ctx->asid == HL_KERNEL_ASID_ID)
10893 		return 0;
10894 
10895 	rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
10896 	if (rc)
10897 		return rc;
10898 
10899 	/* No need to clear user registers if the device has just
10900 	 * performed reset, we restore only nic qm registers
10901 	 */
10902 	if (ctx->hdev->reset_upon_device_release)
10903 		gaudi2_restore_nic_qm_registers(ctx->hdev);
10904 	else
10905 		gaudi2_restore_user_registers(ctx->hdev);
10906 
10907 	rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
10908 	if (rc)
10909 		return rc;
10910 
10911 	rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
10912 	if (rc)
10913 		gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10914 
10915 	return rc;
10916 }
10917 
gaudi2_ctx_fini(struct hl_ctx * ctx)10918 static void gaudi2_ctx_fini(struct hl_ctx *ctx)
10919 {
10920 	if (ctx->asid == HL_KERNEL_ASID_ID)
10921 		return;
10922 
10923 	gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10924 
10925 	gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
10926 }
10927 
gaudi2_pre_schedule_cs(struct hl_cs * cs)10928 static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
10929 {
10930 	struct hl_device *hdev = cs->ctx->hdev;
10931 	int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
10932 	u32 mon_payload, sob_id, mon_id;
10933 
10934 	if (!cs_needs_completion(cs))
10935 		return 0;
10936 
10937 	/*
10938 	 * First 64 SOB/MON are reserved for driver for QMAN auto completion
10939 	 * mechanism. Each SOB/MON pair are used for a pending CS with the same
10940 	 * cyclic index. The SOB value is increased when each of the CS jobs is
10941 	 * completed. When the SOB reaches the number of CS jobs, the monitor
10942 	 * generates MSI-X interrupt.
10943 	 */
10944 
10945 	sob_id = mon_id = index;
10946 	mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
10947 				(1 << CQ_ENTRY_READY_SHIFT) | index;
10948 
10949 	gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
10950 				cs->jobs_cnt);
10951 
10952 	return 0;
10953 }
10954 
gaudi2_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)10955 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
10956 {
10957 	return HL_INVALID_QUEUE;
10958 }
10959 
gaudi2_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)10960 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
10961 {
10962 	struct hl_cb *cb = data;
10963 	struct packet_msg_short *pkt;
10964 	u32 value, ctl, pkt_size = sizeof(*pkt);
10965 
10966 	pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size);
10967 	memset(pkt, 0, pkt_size);
10968 
10969 	/* Inc by 1, Mode ADD */
10970 	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
10971 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
10972 
10973 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
10974 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */
10975 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10976 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb);
10977 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10978 
10979 	pkt->value = cpu_to_le32(value);
10980 	pkt->ctl = cpu_to_le32(ctl);
10981 
10982 	return size + pkt_size;
10983 }
10984 
gaudi2_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)10985 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)
10986 {
10987 	u32 ctl, pkt_size = sizeof(*pkt);
10988 
10989 	memset(pkt, 0, pkt_size);
10990 
10991 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10992 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0);  /* MON base */
10993 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10994 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10995 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0);
10996 
10997 	pkt->value = cpu_to_le32(value);
10998 	pkt->ctl = cpu_to_le32(ctl);
10999 
11000 	return pkt_size;
11001 }
11002 
gaudi2_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 addr)11003 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt,
11004 					u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr)
11005 {
11006 	u32 ctl, value, pkt_size = sizeof(*pkt);
11007 	u8 mask;
11008 
11009 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
11010 		dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask);
11011 		return 0;
11012 	}
11013 
11014 	memset(pkt, 0, pkt_size);
11015 
11016 	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
11017 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
11018 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/
11019 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask);
11020 
11021 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
11022 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
11023 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
11024 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
11025 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
11026 
11027 	pkt->value = cpu_to_le32(value);
11028 	pkt->ctl = cpu_to_le32(ctl);
11029 
11030 	return pkt_size;
11031 }
11032 
gaudi2_add_fence_pkt(struct packet_fence * pkt)11033 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
11034 {
11035 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
11036 
11037 	memset(pkt, 0, pkt_size);
11038 
11039 	cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
11040 	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
11041 	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2);
11042 
11043 	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
11044 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
11045 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
11046 
11047 	pkt->cfg = cpu_to_le32(cfg);
11048 	pkt->ctl = cpu_to_le32(ctl);
11049 
11050 	return pkt_size;
11051 }
11052 
gaudi2_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)11053 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
11054 {
11055 	struct hl_cb *cb = prop->data;
11056 	void *buf = (void *) (uintptr_t) (cb->kernel_address);
11057 
11058 	u64 monitor_base, fence_addr = 0;
11059 	u32 stream_index, size = prop->size;
11060 	u16 msg_addr_offset;
11061 
11062 	stream_index = prop->q_idx % 4;
11063 	fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] +
11064 			QM_FENCE2_OFFSET + stream_index * 4;
11065 
11066 	/*
11067 	 * monitor_base should be the content of the base0 address registers,
11068 	 * so it will be added to the msg short offsets
11069 	 */
11070 	monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
11071 
11072 	/* First monitor config packet: low address of the sync */
11073 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) -
11074 				monitor_base;
11075 
11076 	size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset);
11077 
11078 	/* Second monitor config packet: high address of the sync */
11079 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) -
11080 				monitor_base;
11081 
11082 	size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset);
11083 
11084 	/*
11085 	 * Third monitor config packet: the payload, i.e. what to write when the
11086 	 * sync triggers
11087 	 */
11088 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) -
11089 				monitor_base;
11090 
11091 	size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset);
11092 
11093 	/* Fourth monitor config packet: bind the monitor to a sync object */
11094 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base;
11095 
11096 	size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask,
11097 						prop->sob_val, msg_addr_offset);
11098 
11099 	/* Fence packet */
11100 	size += gaudi2_add_fence_pkt(buf + size);
11101 
11102 	return size;
11103 }
11104 
gaudi2_reset_sob(struct hl_device * hdev,void * data)11105 static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
11106 {
11107 	struct hl_hw_sob *hw_sob = data;
11108 
11109 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
11110 
11111 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0);
11112 
11113 	kref_init(&hw_sob->kref);
11114 }
11115 
gaudi2_reset_sob_group(struct hl_device * hdev,u16 sob_group)11116 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group)
11117 {
11118 }
11119 
gaudi2_get_device_time(struct hl_device * hdev)11120 static u64 gaudi2_get_device_time(struct hl_device *hdev)
11121 {
11122 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
11123 
11124 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
11125 }
11126 
gaudi2_collective_wait_init_cs(struct hl_cs * cs)11127 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs)
11128 {
11129 	return 0;
11130 }
11131 
gaudi2_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id,u32 encaps_signal_offset)11132 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx,
11133 					struct hl_cs *cs, u32 wait_queue_id,
11134 					u32 collective_engine_id, u32 encaps_signal_offset)
11135 {
11136 	return -EINVAL;
11137 }
11138 
11139 /*
11140  * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address
11141  *                   to DMMU page-size address (64MB) before mapping it in
11142  *                   the MMU.
11143  * The operation is performed on both the virtual and physical addresses.
11144  * for device with 6 HBMs the scramble is:
11145  * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48]
11146  *
11147  * Example:
11148  * =============================================================================
11149  * Allocated DRAM  Reserved VA      scrambled VA for MMU mapping    Scrambled PA
11150  * Phys address                                                     in MMU last
11151  *                                                                    HOP
11152  * =============================================================================
11153  * PA1 0x3000000  VA1 0x9C000000  SVA1= (VA1/48M)*64M 0xD0000000  <- PA1/48M 0x1
11154  * PA2 0x9000000  VA2 0x9F000000  SVA2= (VA2/48M)*64M 0xD4000000  <- PA2/48M 0x3
11155  * =============================================================================
11156  */
gaudi2_mmu_scramble_addr(struct hl_device * hdev,u64 raw_addr)11157 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr)
11158 {
11159 	struct asic_fixed_properties *prop = &hdev->asic_prop;
11160 	u32 divisor, mod_va;
11161 	u64 div_va;
11162 
11163 	/* accept any address in the DRAM address space */
11164 	if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE,
11165 									VA_HBM_SPACE_END)) {
11166 
11167 		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
11168 		div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va);
11169 		return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) |
11170 			(div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) |
11171 			(mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT);
11172 	}
11173 
11174 	return raw_addr;
11175 }
11176 
gaudi2_mmu_descramble_addr(struct hl_device * hdev,u64 scrambled_addr)11177 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr)
11178 {
11179 	struct asic_fixed_properties *prop = &hdev->asic_prop;
11180 	u32 divisor, mod_va;
11181 	u64 div_va;
11182 
11183 	/* accept any address in the DRAM address space */
11184 	if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE,
11185 									VA_HBM_SPACE_END)) {
11186 
11187 		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
11188 		div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK,
11189 					PAGE_SIZE_64MB, &mod_va);
11190 
11191 		return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) +
11192 					(div_va * divisor + mod_va));
11193 	}
11194 
11195 	return scrambled_addr;
11196 }
11197 
gaudi2_get_dec_base_addr(struct hl_device * hdev,u32 core_id)11198 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id)
11199 {
11200 	u32 base = 0, dcore_id, dec_id;
11201 
11202 	if (core_id >= NUMBER_OF_DEC) {
11203 		dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id);
11204 		goto out;
11205 	}
11206 
11207 	if (core_id < 8) {
11208 		dcore_id = core_id / NUM_OF_DEC_PER_DCORE;
11209 		dec_id = core_id % NUM_OF_DEC_PER_DCORE;
11210 
11211 		base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET +
11212 				dec_id * DCORE_VDEC_OFFSET;
11213 	} else {
11214 		/* PCIe Shared Decoder */
11215 		base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET);
11216 	}
11217 out:
11218 	return base;
11219 }
11220 
gaudi2_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)11221 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
11222 				u32 *block_size, u32 *block_id)
11223 {
11224 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
11225 	int i;
11226 
11227 	for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) {
11228 		if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) {
11229 			*block_id = i;
11230 			if (block_size)
11231 				*block_size = gaudi2->mapped_blocks[i].size;
11232 			return 0;
11233 		}
11234 	}
11235 
11236 	dev_err(hdev->dev, "Invalid block address %#llx", block_addr);
11237 
11238 	return -EINVAL;
11239 }
11240 
gaudi2_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)11241 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
11242 			u32 block_id, u32 block_size)
11243 {
11244 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
11245 	u64 offset_in_bar;
11246 	u64 address;
11247 	int rc;
11248 
11249 	if (block_id >= NUM_USER_MAPPED_BLOCKS) {
11250 		dev_err(hdev->dev, "Invalid block id %u", block_id);
11251 		return -EINVAL;
11252 	}
11253 
11254 	/* we allow mapping only an entire block */
11255 	if (block_size != gaudi2->mapped_blocks[block_id].size) {
11256 		dev_err(hdev->dev, "Invalid block size %u", block_size);
11257 		return -EINVAL;
11258 	}
11259 
11260 	offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR;
11261 
11262 	address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
11263 
11264 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
11265 			VM_DONTCOPY | VM_NORESERVE);
11266 
11267 	rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
11268 			block_size, vma->vm_page_prot);
11269 	if (rc)
11270 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
11271 
11272 	return rc;
11273 }
11274 
gaudi2_enable_events_from_fw(struct hl_device * hdev)11275 static void gaudi2_enable_events_from_fw(struct hl_device *hdev)
11276 {
11277 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
11278 
11279 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
11280 	u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq);
11281 
11282 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
11283 		WREG32(irq_handler_offset,
11284 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id);
11285 }
11286 
gaudi2_get_mmu_base(struct hl_device * hdev,u64 mmu_id,u32 * mmu_base)11287 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base)
11288 {
11289 	switch (mmu_id) {
11290 	case HW_CAP_DCORE0_DMMU0:
11291 		*mmu_base = mmDCORE0_HMMU0_MMU_BASE;
11292 		break;
11293 	case HW_CAP_DCORE0_DMMU1:
11294 		*mmu_base = mmDCORE0_HMMU1_MMU_BASE;
11295 		break;
11296 	case HW_CAP_DCORE0_DMMU2:
11297 		*mmu_base = mmDCORE0_HMMU2_MMU_BASE;
11298 		break;
11299 	case HW_CAP_DCORE0_DMMU3:
11300 		*mmu_base = mmDCORE0_HMMU3_MMU_BASE;
11301 		break;
11302 	case HW_CAP_DCORE1_DMMU0:
11303 		*mmu_base = mmDCORE1_HMMU0_MMU_BASE;
11304 		break;
11305 	case HW_CAP_DCORE1_DMMU1:
11306 		*mmu_base = mmDCORE1_HMMU1_MMU_BASE;
11307 		break;
11308 	case HW_CAP_DCORE1_DMMU2:
11309 		*mmu_base = mmDCORE1_HMMU2_MMU_BASE;
11310 		break;
11311 	case HW_CAP_DCORE1_DMMU3:
11312 		*mmu_base = mmDCORE1_HMMU3_MMU_BASE;
11313 		break;
11314 	case HW_CAP_DCORE2_DMMU0:
11315 		*mmu_base = mmDCORE2_HMMU0_MMU_BASE;
11316 		break;
11317 	case HW_CAP_DCORE2_DMMU1:
11318 		*mmu_base = mmDCORE2_HMMU1_MMU_BASE;
11319 		break;
11320 	case HW_CAP_DCORE2_DMMU2:
11321 		*mmu_base = mmDCORE2_HMMU2_MMU_BASE;
11322 		break;
11323 	case HW_CAP_DCORE2_DMMU3:
11324 		*mmu_base = mmDCORE2_HMMU3_MMU_BASE;
11325 		break;
11326 	case HW_CAP_DCORE3_DMMU0:
11327 		*mmu_base = mmDCORE3_HMMU0_MMU_BASE;
11328 		break;
11329 	case HW_CAP_DCORE3_DMMU1:
11330 		*mmu_base = mmDCORE3_HMMU1_MMU_BASE;
11331 		break;
11332 	case HW_CAP_DCORE3_DMMU2:
11333 		*mmu_base = mmDCORE3_HMMU2_MMU_BASE;
11334 		break;
11335 	case HW_CAP_DCORE3_DMMU3:
11336 		*mmu_base = mmDCORE3_HMMU3_MMU_BASE;
11337 		break;
11338 	case HW_CAP_PMMU:
11339 		*mmu_base = mmPMMU_HBW_MMU_BASE;
11340 		break;
11341 	default:
11342 		return -EINVAL;
11343 	}
11344 
11345 	return 0;
11346 }
11347 
gaudi2_ack_mmu_error(struct hl_device * hdev,u64 mmu_id)11348 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
11349 {
11350 	bool is_pmmu = (mmu_id == HW_CAP_PMMU);
11351 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
11352 	u32 mmu_base;
11353 
11354 	if (!(gaudi2->hw_cap_initialized & mmu_id))
11355 		return;
11356 
11357 	if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
11358 		return;
11359 
11360 	gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL);
11361 	gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
11362 }
11363 
gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device * hdev,u64 mmu_cap_mask)11364 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
11365 {
11366 	u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES;
11367 
11368 	/* check all HMMUs */
11369 	for (i = 0 ; i < num_of_hmmus ; i++) {
11370 		mmu_id = HW_CAP_DCORE0_DMMU0 << i;
11371 
11372 		if (mmu_cap_mask & mmu_id)
11373 			gaudi2_ack_mmu_error(hdev, mmu_id);
11374 	}
11375 
11376 	/* check PMMU */
11377 	if (mmu_cap_mask & HW_CAP_PMMU)
11378 		gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU);
11379 
11380 	return 0;
11381 }
11382 
gaudi2_get_msi_info(__le32 * table)11383 static void gaudi2_get_msi_info(__le32 *table)
11384 {
11385 	table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
11386 	table[CPUCP_EVENT_QUEUE_ERR_MSI_TYPE] = cpu_to_le32(GAUDI2_IRQ_NUM_EQ_ERROR);
11387 }
11388 
gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)11389 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
11390 {
11391 	switch (pll_idx) {
11392 	case HL_GAUDI2_CPU_PLL: return CPU_PLL;
11393 	case HL_GAUDI2_PCI_PLL: return PCI_PLL;
11394 	case HL_GAUDI2_NIC_PLL: return NIC_PLL;
11395 	case HL_GAUDI2_DMA_PLL: return DMA_PLL;
11396 	case HL_GAUDI2_MESH_PLL: return MESH_PLL;
11397 	case HL_GAUDI2_MME_PLL: return MME_PLL;
11398 	case HL_GAUDI2_TPC_PLL: return TPC_PLL;
11399 	case HL_GAUDI2_IF_PLL: return IF_PLL;
11400 	case HL_GAUDI2_SRAM_PLL: return SRAM_PLL;
11401 	case HL_GAUDI2_HBM_PLL: return HBM_PLL;
11402 	case HL_GAUDI2_VID_PLL: return VID_PLL;
11403 	case HL_GAUDI2_MSS_PLL: return MSS_PLL;
11404 	default: return -EINVAL;
11405 	}
11406 }
11407 
gaudi2_gen_sync_to_engine_map(struct hl_device * hdev,struct hl_sync_to_engine_map * map)11408 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map)
11409 {
11410 	/* Not implemented */
11411 	return 0;
11412 }
11413 
gaudi2_monitor_valid(struct hl_mon_state_dump * mon)11414 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon)
11415 {
11416 	/* Not implemented */
11417 	return 0;
11418 }
11419 
gaudi2_print_single_monitor(char ** buf,size_t * size,size_t * offset,struct hl_device * hdev,struct hl_mon_state_dump * mon)11420 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset,
11421 				struct hl_device *hdev, struct hl_mon_state_dump *mon)
11422 {
11423 	/* Not implemented */
11424 	return 0;
11425 }
11426 
11427 
gaudi2_print_fences_single_engine(struct hl_device * hdev,u64 base_offset,u64 status_base_offset,enum hl_sync_engine_type engine_type,u32 engine_id,char ** buf,size_t * size,size_t * offset)11428 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset,
11429 				u64 status_base_offset, enum hl_sync_engine_type engine_type,
11430 				u32 engine_id, char **buf, size_t *size, size_t *offset)
11431 {
11432 	/* Not implemented */
11433 	return 0;
11434 }
11435 
11436 
11437 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = {
11438 	.monitor_valid = gaudi2_monitor_valid,
11439 	.print_single_monitor = gaudi2_print_single_monitor,
11440 	.gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map,
11441 	.print_fences_single_engine = gaudi2_print_fences_single_engine,
11442 };
11443 
gaudi2_state_dump_init(struct hl_device * hdev)11444 static void gaudi2_state_dump_init(struct hl_device *hdev)
11445 {
11446 	/* Not implemented */
11447 	hdev->state_dump_specs.props = gaudi2_state_dump_specs_props;
11448 	hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs;
11449 }
11450 
gaudi2_get_sob_addr(struct hl_device * hdev,u32 sob_id)11451 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id)
11452 {
11453 	return 0;
11454 }
11455 
gaudi2_get_stream_master_qid_arr(void)11456 static u32 *gaudi2_get_stream_master_qid_arr(void)
11457 {
11458 	return NULL;
11459 }
11460 
gaudi2_add_device_attr(struct hl_device * hdev,struct attribute_group * dev_clk_attr_grp,struct attribute_group * dev_vrm_attr_grp)11461 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
11462 				struct attribute_group *dev_vrm_attr_grp)
11463 {
11464 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
11465 	hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp);
11466 }
11467 
gaudi2_mmu_get_real_page_size(struct hl_device * hdev,struct hl_mmu_properties * mmu_prop,u32 page_size,u32 * real_page_size,bool is_dram_addr)11468 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
11469 					u32 page_size, u32 *real_page_size, bool is_dram_addr)
11470 {
11471 	struct asic_fixed_properties *prop = &hdev->asic_prop;
11472 
11473 	/* for host pages the page size must be  */
11474 	if (!is_dram_addr) {
11475 		if (page_size % mmu_prop->page_size)
11476 			goto page_size_err;
11477 
11478 		*real_page_size = mmu_prop->page_size;
11479 		return 0;
11480 	}
11481 
11482 	if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size))
11483 		goto page_size_err;
11484 
11485 	/*
11486 	 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater
11487 	 * than DRAM page size).
11488 	 * for this reason work with the DRAM page size and let the MMU scrambling routine handle
11489 	 * this mismatch when calculating the address to place in the MMU page table.
11490 	 * (in that case also make sure that the dram_page_size is not greater than the
11491 	 * mmu page size)
11492 	 */
11493 	*real_page_size = prop->dram_page_size;
11494 
11495 	return 0;
11496 
11497 page_size_err:
11498 	dev_err(hdev->dev, "page size of 0x%X is not 0x%X aligned, can't map\n",
11499 							page_size, mmu_prop->page_size >> 10);
11500 	return -EFAULT;
11501 }
11502 
gaudi2_get_monitor_dump(struct hl_device * hdev,void * data)11503 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
11504 {
11505 	return -EOPNOTSUPP;
11506 }
11507 
gaudi2_send_device_activity(struct hl_device * hdev,bool open)11508 int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
11509 {
11510 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
11511 
11512 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
11513 		return 0;
11514 
11515 	return hl_fw_send_device_activity(hdev, open);
11516 }
11517 
gaudi2_read_pte(struct hl_device * hdev,u64 addr)11518 static u64 gaudi2_read_pte(struct hl_device *hdev, u64 addr)
11519 {
11520 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
11521 	u64 val;
11522 
11523 	if (hdev->reset_info.hard_reset_pending)
11524 		return U64_MAX;
11525 
11526 	val = readq(hdev->pcie_bar[DRAM_BAR_ID] + (addr - gaudi2->dram_bar_cur_addr));
11527 
11528 	return val;
11529 }
11530 
gaudi2_write_pte(struct hl_device * hdev,u64 addr,u64 val)11531 static void gaudi2_write_pte(struct hl_device *hdev, u64 addr, u64 val)
11532 {
11533 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
11534 
11535 	if (hdev->reset_info.hard_reset_pending)
11536 		return;
11537 
11538 	writeq(val, hdev->pcie_bar[DRAM_BAR_ID] + (addr - gaudi2->dram_bar_cur_addr));
11539 }
11540 
11541 static const struct hl_asic_funcs gaudi2_funcs = {
11542 	.early_init = gaudi2_early_init,
11543 	.early_fini = gaudi2_early_fini,
11544 	.late_init = gaudi2_late_init,
11545 	.late_fini = gaudi2_late_fini,
11546 	.sw_init = gaudi2_sw_init,
11547 	.sw_fini = gaudi2_sw_fini,
11548 	.hw_init = gaudi2_hw_init,
11549 	.hw_fini = gaudi2_hw_fini,
11550 	.halt_engines = gaudi2_halt_engines,
11551 	.suspend = gaudi2_suspend,
11552 	.resume = gaudi2_resume,
11553 	.mmap = gaudi2_mmap,
11554 	.ring_doorbell = gaudi2_ring_doorbell,
11555 	.pqe_write = gaudi2_pqe_write,
11556 	.asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent,
11557 	.asic_dma_free_coherent = gaudi2_dma_free_coherent,
11558 	.scrub_device_mem = gaudi2_scrub_device_mem,
11559 	.scrub_device_dram = gaudi2_scrub_device_dram,
11560 	.get_int_queue_base = NULL,
11561 	.test_queues = gaudi2_test_queues,
11562 	.asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc,
11563 	.asic_dma_pool_free = gaudi2_dma_pool_free,
11564 	.cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
11565 	.cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
11566 	.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
11567 	.cs_parser = gaudi2_cs_parser,
11568 	.dma_map_sgtable = hl_asic_dma_map_sgtable,
11569 	.add_end_of_cb_packets = NULL,
11570 	.update_eq_ci = gaudi2_update_eq_ci,
11571 	.context_switch = gaudi2_context_switch,
11572 	.restore_phase_topology = gaudi2_restore_phase_topology,
11573 	.debugfs_read_dma = gaudi2_debugfs_read_dma,
11574 	.add_device_attr = gaudi2_add_device_attr,
11575 	.handle_eqe = gaudi2_handle_eqe,
11576 	.get_events_stat = gaudi2_get_events_stat,
11577 	.read_pte = gaudi2_read_pte,
11578 	.write_pte = gaudi2_write_pte,
11579 	.mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
11580 	.mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
11581 	.mmu_prefetch_cache_range = NULL,
11582 	.send_heartbeat = gaudi2_send_heartbeat,
11583 	.debug_coresight = gaudi2_debug_coresight,
11584 	.is_device_idle = gaudi2_is_device_idle,
11585 	.compute_reset_late_init = gaudi2_compute_reset_late_init,
11586 	.hw_queues_lock = gaudi2_hw_queues_lock,
11587 	.hw_queues_unlock = gaudi2_hw_queues_unlock,
11588 	.get_pci_id = gaudi2_get_pci_id,
11589 	.get_eeprom_data = gaudi2_get_eeprom_data,
11590 	.get_monitor_dump = gaudi2_get_monitor_dump,
11591 	.send_cpu_message = gaudi2_send_cpu_message,
11592 	.pci_bars_map = gaudi2_pci_bars_map,
11593 	.init_iatu = gaudi2_init_iatu,
11594 	.rreg = hl_rreg,
11595 	.wreg = hl_wreg,
11596 	.halt_coresight = gaudi2_halt_coresight,
11597 	.ctx_init = gaudi2_ctx_init,
11598 	.ctx_fini = gaudi2_ctx_fini,
11599 	.pre_schedule_cs = gaudi2_pre_schedule_cs,
11600 	.get_queue_id_for_cq = gaudi2_get_queue_id_for_cq,
11601 	.load_firmware_to_device = NULL,
11602 	.load_boot_fit_to_device = NULL,
11603 	.get_signal_cb_size = gaudi2_get_signal_cb_size,
11604 	.get_wait_cb_size = gaudi2_get_wait_cb_size,
11605 	.gen_signal_cb = gaudi2_gen_signal_cb,
11606 	.gen_wait_cb = gaudi2_gen_wait_cb,
11607 	.reset_sob = gaudi2_reset_sob,
11608 	.reset_sob_group = gaudi2_reset_sob_group,
11609 	.get_device_time = gaudi2_get_device_time,
11610 	.pb_print_security_errors = gaudi2_pb_print_security_errors,
11611 	.collective_wait_init_cs = gaudi2_collective_wait_init_cs,
11612 	.collective_wait_create_jobs = gaudi2_collective_wait_create_jobs,
11613 	.get_dec_base_addr = gaudi2_get_dec_base_addr,
11614 	.scramble_addr = gaudi2_mmu_scramble_addr,
11615 	.descramble_addr = gaudi2_mmu_descramble_addr,
11616 	.ack_protection_bits_errors = gaudi2_ack_protection_bits_errors,
11617 	.get_hw_block_id = gaudi2_get_hw_block_id,
11618 	.hw_block_mmap = gaudi2_block_mmap,
11619 	.enable_events_from_fw = gaudi2_enable_events_from_fw,
11620 	.ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
11621 	.get_msi_info = gaudi2_get_msi_info,
11622 	.map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
11623 	.init_firmware_preload_params = gaudi2_init_firmware_preload_params,
11624 	.init_firmware_loader = gaudi2_init_firmware_loader,
11625 	.init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
11626 	.state_dump_init = gaudi2_state_dump_init,
11627 	.get_sob_addr = &gaudi2_get_sob_addr,
11628 	.set_pci_memory_regions = gaudi2_set_pci_memory_regions,
11629 	.get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr,
11630 	.check_if_razwi_happened = gaudi2_check_if_razwi_happened,
11631 	.mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
11632 	.access_dev_mem = hl_access_dev_mem,
11633 	.set_dram_bar_base = gaudi2_set_hbm_bar_base,
11634 	.set_engine_cores = gaudi2_set_engine_cores,
11635 	.set_engines = gaudi2_set_engines,
11636 	.send_device_activity = gaudi2_send_device_activity,
11637 	.set_dram_properties = gaudi2_set_dram_properties,
11638 	.set_binning_masks = gaudi2_set_binning_masks,
11639 };
11640 
gaudi2_set_asic_funcs(struct hl_device * hdev)11641 void gaudi2_set_asic_funcs(struct hl_device *hdev)
11642 {
11643 	hdev->asic_funcs = &gaudi2_funcs;
11644 }
11645