1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2020-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudi2P.h"
9 #include "gaudi2_masks.h"
10 #include "../include/gaudi2/gaudi2_special_blocks.h"
11 #include "../include/hw_ip/mmu/mmu_general.h"
12 #include "../include/hw_ip/mmu/mmu_v2_0.h"
13 #include "../include/gaudi2/gaudi2_packets.h"
14 #include "../include/gaudi2/gaudi2_reg_map.h"
15 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h"
16 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h"
17 
18 #include <linux/module.h>
19 #include <linux/pci.h>
20 #include <linux/hwmon.h>
21 #include <linux/iommu.h>
22 
23 #define GAUDI2_DMA_POOL_BLK_SIZE		SZ_256		/* 256 bytes */
24 
25 #define GAUDI2_RESET_TIMEOUT_MSEC		2000		/* 2000ms */
26 
27 #define GAUDI2_RESET_POLL_TIMEOUT_USEC		500000		/* 500ms */
28 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC		25000		/* 25s */
29 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC		25000		/* 25s */
30 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC	3000000		/* 3s */
31 #define GAUDI2_RESET_POLL_CNT			3
32 #define GAUDI2_RESET_WAIT_MSEC			1		/* 1ms */
33 #define GAUDI2_CPU_RESET_WAIT_MSEC		100		/* 100ms */
34 #define GAUDI2_PLDM_RESET_WAIT_MSEC		1000		/* 1s */
35 #define GAUDI2_CB_POOL_CB_CNT			512
36 #define GAUDI2_CB_POOL_CB_SIZE			SZ_128K		/* 128KB */
37 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC		4000000		/* 4s */
38 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC		25000000	/* 25s */
39 #define GAUDI2_TEST_QUEUE_WAIT_USEC		100000		/* 100ms */
40 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
41 
42 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT		3
43 
44 /*
45  * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs
46  * and the code relies on that value (for array size etc..) we define another value
47  * for MAX faulty TPCs which reflects the cluster binning requirements
48  */
49 #define MAX_CLUSTER_BINNING_FAULTY_TPCS		1
50 #define MAX_FAULTY_XBARS			1
51 #define MAX_FAULTY_EDMAS			1
52 #define MAX_FAULTY_DECODERS			1
53 
54 #define GAUDI2_TPC_FULL_MASK			0x1FFFFFF
55 #define GAUDI2_HIF_HMMU_FULL_MASK		0xFFFF
56 #define GAUDI2_DECODER_FULL_MASK		0x3FF
57 
58 #define GAUDI2_NA_EVENT_CAUSE			0xFF
59 #define GAUDI2_NUM_OF_QM_ERR_CAUSE		18
60 #define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE		25
61 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE		3
62 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE		14
63 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE		3
64 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE		2
65 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE		22
66 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE		30
67 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE		25
68 #define GAUDI2_NUM_OF_MME_ERR_CAUSE		16
69 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE	5
70 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE		7
71 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE	8
72 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE		19
73 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE		9
74 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE		3
75 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE	3
76 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE	2
77 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE	2
78 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE	2
79 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE		5
80 
81 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 10)
82 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC		(MMU_CONFIG_TIMEOUT_USEC * 200)
83 #define GAUDI2_ARB_WDT_TIMEOUT			(0x1000000)
84 
85 #define GAUDI2_VDEC_TIMEOUT_USEC		10000		/* 10ms */
86 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC		(GAUDI2_VDEC_TIMEOUT_USEC * 100)
87 
88 #define KDMA_TIMEOUT_USEC			USEC_PER_SEC
89 
90 #define IS_DMA_IDLE(dma_core_sts0)	\
91 	(!((dma_core_sts0) & (DCORE0_EDMA0_CORE_STS0_BUSY_MASK)))
92 
93 #define IS_DMA_HALTED(dma_core_sts1)	\
94 	((dma_core_sts1) & (DCORE0_EDMA0_CORE_STS1_IS_HALT_MASK))
95 
96 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
97 
98 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK))
99 
100 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \
101 	((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \
102 	(((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \
103 	(((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK)))
104 
105 #define PCIE_DEC_EN_MASK			0x300
106 #define DEC_WORK_STATE_IDLE			0
107 #define DEC_WORK_STATE_PEND			3
108 #define IS_DEC_IDLE(dec_swreg15) \
109 	(((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \
110 	((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) ==  DEC_WORK_STATE_PEND)
111 
112 /* HBM MMU address scrambling parameters */
113 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE		SZ_8M
114 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT		26
115 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT		0
116 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK	DRAM_VA_HINT_MASK
117 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR	16
118 #define MMU_RANGE_INV_VA_LSB_SHIFT		12
119 #define MMU_RANGE_INV_VA_MSB_SHIFT		44
120 #define MMU_RANGE_INV_EN_SHIFT			0
121 #define MMU_RANGE_INV_ASID_EN_SHIFT		1
122 #define MMU_RANGE_INV_ASID_SHIFT		2
123 
124 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
125  * a 2 entries FIFO, and hence it is not enabled for it.
126  */
127 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
128 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
129 
130 #define GAUDI2_MAX_STRING_LEN			64
131 
132 #define GAUDI2_VDEC_MSIX_ENTRIES		(GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
133 							GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
134 
135 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
136 
137 /* RAZWI initiator coordinates */
138 #define RAZWI_GET_AXUSER_XY(x) \
139 	((x & 0xF8001FF0) >> 4)
140 
141 #define RAZWI_GET_AXUSER_LOW_XY(x) \
142 	((x & 0x00001FF0) >> 4)
143 
144 #define RAZWI_INITIATOR_AXUER_L_X_SHIFT		0
145 #define RAZWI_INITIATOR_AXUER_L_X_MASK		0x1F
146 #define RAZWI_INITIATOR_AXUER_L_Y_SHIFT		5
147 #define RAZWI_INITIATOR_AXUER_L_Y_MASK		0xF
148 
149 #define RAZWI_INITIATOR_AXUER_H_X_SHIFT		23
150 #define RAZWI_INITIATOR_AXUER_H_X_MASK		0x1F
151 
152 #define RAZWI_INITIATOR_ID_X_Y_LOW(x, y) \
153 	((((y) & RAZWI_INITIATOR_AXUER_L_Y_MASK) << RAZWI_INITIATOR_AXUER_L_Y_SHIFT) | \
154 		(((x) & RAZWI_INITIATOR_AXUER_L_X_MASK) << RAZWI_INITIATOR_AXUER_L_X_SHIFT))
155 
156 #define RAZWI_INITIATOR_ID_X_HIGH(x) \
157 		(((x) & RAZWI_INITIATOR_AXUER_H_X_MASK) << RAZWI_INITIATOR_AXUER_H_X_SHIFT)
158 
159 #define RAZWI_INITIATOR_ID_X_Y(xl, yl, xh) \
160 	(RAZWI_INITIATOR_ID_X_Y_LOW(xl, yl) | RAZWI_INITIATOR_ID_X_HIGH(xh))
161 
162 #define PSOC_RAZWI_ENG_STR_SIZE 128
163 #define PSOC_RAZWI_MAX_ENG_PER_RTR 5
164 
165 struct gaudi2_razwi_info {
166 	u32 axuser_xy;
167 	u32 rtr_ctrl;
168 	u16 eng_id;
169 	char *eng_name;
170 };
171 
172 static struct gaudi2_razwi_info common_razwi_info[] = {
173 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 0), mmDCORE0_RTR0_CTRL_BASE,
174 				GAUDI2_DCORE0_ENGINE_ID_DEC_0, "DEC0"},
175 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
176 				GAUDI2_DCORE0_ENGINE_ID_DEC_1, "DEC1"},
177 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 18), mmDCORE1_RTR7_CTRL_BASE,
178 				GAUDI2_DCORE1_ENGINE_ID_DEC_0, "DEC2"},
179 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
180 				GAUDI2_DCORE1_ENGINE_ID_DEC_1, "DEC3"},
181 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 0), mmDCORE2_RTR0_CTRL_BASE,
182 				GAUDI2_DCORE2_ENGINE_ID_DEC_0, "DEC4"},
183 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
184 				GAUDI2_DCORE2_ENGINE_ID_DEC_1, "DEC5"},
185 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 18), mmDCORE3_RTR7_CTRL_BASE,
186 				GAUDI2_DCORE3_ENGINE_ID_DEC_0, "DEC6"},
187 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
188 				GAUDI2_DCORE3_ENGINE_ID_DEC_1, "DEC7"},
189 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 6), mmDCORE0_RTR0_CTRL_BASE,
190 				GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC8"},
191 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 7), mmDCORE0_RTR0_CTRL_BASE,
192 				GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC9"},
193 		{RAZWI_INITIATOR_ID_X_Y(3, 4, 2), mmDCORE0_RTR1_CTRL_BASE,
194 				GAUDI2_DCORE0_ENGINE_ID_TPC_0, "TPC0"},
195 		{RAZWI_INITIATOR_ID_X_Y(3, 4, 4), mmDCORE0_RTR1_CTRL_BASE,
196 				GAUDI2_DCORE0_ENGINE_ID_TPC_1, "TPC1"},
197 		{RAZWI_INITIATOR_ID_X_Y(4, 4, 2), mmDCORE0_RTR2_CTRL_BASE,
198 				GAUDI2_DCORE0_ENGINE_ID_TPC_2, "TPC2"},
199 		{RAZWI_INITIATOR_ID_X_Y(4, 4, 4), mmDCORE0_RTR2_CTRL_BASE,
200 				GAUDI2_DCORE0_ENGINE_ID_TPC_3, "TPC3"},
201 		{RAZWI_INITIATOR_ID_X_Y(5, 4, 2), mmDCORE0_RTR3_CTRL_BASE,
202 				GAUDI2_DCORE0_ENGINE_ID_TPC_4, "TPC4"},
203 		{RAZWI_INITIATOR_ID_X_Y(5, 4, 4), mmDCORE0_RTR3_CTRL_BASE,
204 				GAUDI2_DCORE0_ENGINE_ID_TPC_5, "TPC5"},
205 		{RAZWI_INITIATOR_ID_X_Y(16, 4, 14), mmDCORE1_RTR6_CTRL_BASE,
206 				GAUDI2_DCORE1_ENGINE_ID_TPC_0, "TPC6"},
207 		{RAZWI_INITIATOR_ID_X_Y(16, 4, 16), mmDCORE1_RTR6_CTRL_BASE,
208 				GAUDI2_DCORE1_ENGINE_ID_TPC_1, "TPC7"},
209 		{RAZWI_INITIATOR_ID_X_Y(15, 4, 14), mmDCORE1_RTR5_CTRL_BASE,
210 				GAUDI2_DCORE1_ENGINE_ID_TPC_2, "TPC8"},
211 		{RAZWI_INITIATOR_ID_X_Y(15, 4, 16), mmDCORE1_RTR5_CTRL_BASE,
212 				GAUDI2_DCORE1_ENGINE_ID_TPC_3, "TPC9"},
213 		{RAZWI_INITIATOR_ID_X_Y(14, 4, 14), mmDCORE1_RTR4_CTRL_BASE,
214 				GAUDI2_DCORE1_ENGINE_ID_TPC_4, "TPC10"},
215 		{RAZWI_INITIATOR_ID_X_Y(14, 4, 16), mmDCORE1_RTR4_CTRL_BASE,
216 				GAUDI2_DCORE1_ENGINE_ID_TPC_5, "TPC11"},
217 		{RAZWI_INITIATOR_ID_X_Y(5, 11, 2), mmDCORE2_RTR3_CTRL_BASE,
218 				GAUDI2_DCORE2_ENGINE_ID_TPC_0, "TPC12"},
219 		{RAZWI_INITIATOR_ID_X_Y(5, 11, 4), mmDCORE2_RTR3_CTRL_BASE,
220 				GAUDI2_DCORE2_ENGINE_ID_TPC_1, "TPC13"},
221 		{RAZWI_INITIATOR_ID_X_Y(4, 11, 2), mmDCORE2_RTR2_CTRL_BASE,
222 				GAUDI2_DCORE2_ENGINE_ID_TPC_2, "TPC14"},
223 		{RAZWI_INITIATOR_ID_X_Y(4, 11, 4), mmDCORE2_RTR2_CTRL_BASE,
224 				GAUDI2_DCORE2_ENGINE_ID_TPC_3, "TPC15"},
225 		{RAZWI_INITIATOR_ID_X_Y(3, 11, 2), mmDCORE2_RTR1_CTRL_BASE,
226 				GAUDI2_DCORE2_ENGINE_ID_TPC_4, "TPC16"},
227 		{RAZWI_INITIATOR_ID_X_Y(3, 11, 4), mmDCORE2_RTR1_CTRL_BASE,
228 				GAUDI2_DCORE2_ENGINE_ID_TPC_5, "TPC17"},
229 		{RAZWI_INITIATOR_ID_X_Y(14, 11, 14), mmDCORE3_RTR4_CTRL_BASE,
230 				GAUDI2_DCORE3_ENGINE_ID_TPC_0, "TPC18"},
231 		{RAZWI_INITIATOR_ID_X_Y(14, 11, 16), mmDCORE3_RTR4_CTRL_BASE,
232 				GAUDI2_DCORE3_ENGINE_ID_TPC_1, "TPC19"},
233 		{RAZWI_INITIATOR_ID_X_Y(15, 11, 14), mmDCORE3_RTR5_CTRL_BASE,
234 				GAUDI2_DCORE3_ENGINE_ID_TPC_2, "TPC20"},
235 		{RAZWI_INITIATOR_ID_X_Y(15, 11, 16), mmDCORE3_RTR5_CTRL_BASE,
236 				GAUDI2_DCORE3_ENGINE_ID_TPC_3, "TPC21"},
237 		{RAZWI_INITIATOR_ID_X_Y(16, 11, 14), mmDCORE3_RTR6_CTRL_BASE,
238 				GAUDI2_DCORE3_ENGINE_ID_TPC_4, "TPC22"},
239 		{RAZWI_INITIATOR_ID_X_Y(16, 11, 16), mmDCORE3_RTR6_CTRL_BASE,
240 				GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC23"},
241 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
242 				GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC24"},
243 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 8), mmDCORE1_RTR7_CTRL_BASE,
244 				GAUDI2_ENGINE_ID_NIC0_0, "NIC0"},
245 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 10), mmDCORE1_RTR7_CTRL_BASE,
246 				GAUDI2_ENGINE_ID_NIC0_1, "NIC1"},
247 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 12), mmDCORE1_RTR7_CTRL_BASE,
248 				GAUDI2_ENGINE_ID_NIC1_0, "NIC2"},
249 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
250 				GAUDI2_ENGINE_ID_NIC1_1, "NIC3"},
251 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 15), mmDCORE1_RTR7_CTRL_BASE,
252 				GAUDI2_ENGINE_ID_NIC2_0, "NIC4"},
253 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
254 				GAUDI2_ENGINE_ID_NIC2_1, "NIC5"},
255 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
256 				GAUDI2_ENGINE_ID_NIC3_0, "NIC6"},
257 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 6), mmDCORE2_RTR0_CTRL_BASE,
258 				GAUDI2_ENGINE_ID_NIC3_1, "NIC7"},
259 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 8), mmDCORE2_RTR0_CTRL_BASE,
260 				GAUDI2_ENGINE_ID_NIC4_0, "NIC8"},
261 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 12), mmDCORE3_RTR7_CTRL_BASE,
262 				GAUDI2_ENGINE_ID_NIC4_1, "NIC9"},
263 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
264 				GAUDI2_ENGINE_ID_NIC5_0, "NIC10"},
265 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
266 				GAUDI2_ENGINE_ID_NIC5_1, "NIC11"},
267 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
268 				GAUDI2_ENGINE_ID_PDMA_0, "PDMA0"},
269 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 3), mmDCORE0_RTR0_CTRL_BASE,
270 				GAUDI2_ENGINE_ID_PDMA_1, "PDMA1"},
271 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
272 				GAUDI2_ENGINE_ID_SIZE, "PMMU"},
273 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 5), mmDCORE0_RTR0_CTRL_BASE,
274 				GAUDI2_ENGINE_ID_SIZE, "PCIE"},
275 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 16), mmDCORE1_RTR7_CTRL_BASE,
276 				GAUDI2_ENGINE_ID_ARC_FARM, "ARC_FARM"},
277 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 17), mmDCORE1_RTR7_CTRL_BASE,
278 				GAUDI2_ENGINE_ID_KDMA, "KDMA"},
279 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF1_RTR_CTRL_BASE,
280 				GAUDI2_DCORE0_ENGINE_ID_EDMA_0, "EDMA0"},
281 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE,
282 				GAUDI2_DCORE0_ENGINE_ID_EDMA_1, "EDMA1"},
283 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF1_RTR_CTRL_BASE,
284 				GAUDI2_DCORE1_ENGINE_ID_EDMA_0, "EDMA2"},
285 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF0_RTR_CTRL_BASE,
286 				GAUDI2_DCORE1_ENGINE_ID_EDMA_1, "EDMA3"},
287 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
288 				GAUDI2_DCORE2_ENGINE_ID_EDMA_0, "EDMA4"},
289 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
290 				GAUDI2_DCORE2_ENGINE_ID_EDMA_1, "EDMA5"},
291 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
292 				GAUDI2_DCORE3_ENGINE_ID_EDMA_0, "EDMA6"},
293 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
294 				GAUDI2_DCORE3_ENGINE_ID_EDMA_1, "EDMA7"},
295 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
296 				GAUDI2_ENGINE_ID_SIZE, "HMMU0"},
297 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
298 				GAUDI2_ENGINE_ID_SIZE, "HMMU1"},
299 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
300 				GAUDI2_ENGINE_ID_SIZE, "HMMU2"},
301 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
302 				GAUDI2_ENGINE_ID_SIZE, "HMMU3"},
303 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
304 				GAUDI2_ENGINE_ID_SIZE, "HMMU4"},
305 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
306 				GAUDI2_ENGINE_ID_SIZE, "HMMU5"},
307 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
308 				GAUDI2_ENGINE_ID_SIZE, "HMMU6"},
309 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
310 				GAUDI2_ENGINE_ID_SIZE, "HMMU7"},
311 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
312 				GAUDI2_ENGINE_ID_SIZE, "HMMU8"},
313 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
314 				GAUDI2_ENGINE_ID_SIZE, "HMMU9"},
315 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
316 				GAUDI2_ENGINE_ID_SIZE, "HMMU10"},
317 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
318 				GAUDI2_ENGINE_ID_SIZE, "HMMU11"},
319 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
320 				GAUDI2_ENGINE_ID_SIZE, "HMMU12"},
321 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
322 				GAUDI2_ENGINE_ID_SIZE, "HMMU13"},
323 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
324 				GAUDI2_ENGINE_ID_SIZE, "HMMU14"},
325 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
326 				GAUDI2_ENGINE_ID_SIZE, "HMMU15"},
327 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
328 				GAUDI2_ENGINE_ID_ROT_0, "ROT0"},
329 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
330 				GAUDI2_ENGINE_ID_ROT_1, "ROT1"},
331 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
332 				GAUDI2_ENGINE_ID_PSOC, "CPU"},
333 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 11), mmDCORE3_RTR7_CTRL_BASE,
334 				GAUDI2_ENGINE_ID_PSOC, "PSOC"}
335 };
336 
337 static struct gaudi2_razwi_info mme_razwi_info[] = {
338 		/* MME X high coordinate is N/A, hence using only low coordinates */
339 		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
340 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP0"},
341 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
342 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP1"},
343 		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
344 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_WR"},
345 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
346 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_RD"},
347 		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
348 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE0"},
349 		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
350 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE1"},
351 		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
352 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE2"},
353 		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
354 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE3"},
355 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
356 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE4"},
357 		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
358 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP0"},
359 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
360 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP1"},
361 		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
362 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_WR"},
363 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
364 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_RD"},
365 		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
366 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE0"},
367 		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
368 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE1"},
369 		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
370 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE2"},
371 		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
372 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE3"},
373 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
374 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE4"},
375 		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
376 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP0"},
377 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
378 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP1"},
379 		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
380 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_WR"},
381 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
382 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_RD"},
383 		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
384 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE0"},
385 		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
386 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE1"},
387 		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
388 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE2"},
389 		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
390 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE3"},
391 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
392 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE4"},
393 		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
394 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP0"},
395 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
396 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP1"},
397 		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
398 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_WR"},
399 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
400 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_RD"},
401 		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
402 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE0"},
403 		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
404 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE1"},
405 		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
406 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE2"},
407 		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
408 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE3"},
409 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
410 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE4"}
411 };
412 
413 enum hl_pmmu_fatal_cause {
414 	LATENCY_RD_OUT_FIFO_OVERRUN,
415 	LATENCY_WR_OUT_FIFO_OVERRUN,
416 };
417 
418 enum hl_pcie_drain_ind_cause {
419 	LBW_AXI_DRAIN_IND,
420 	HBW_AXI_DRAIN_IND
421 };
422 
423 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = {
424 	[HBM_ID0] = 0xFFFC,
425 	[HBM_ID1] = 0xFFCF,
426 	[HBM_ID2] = 0xF7F7,
427 	[HBM_ID3] = 0x7F7F,
428 	[HBM_ID4] = 0xFCFF,
429 	[HBM_ID5] = 0xCFFF,
430 };
431 
432 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = {
433 	[0] = HBM_ID0,
434 	[1] = HBM_ID1,
435 	[2] = HBM_ID4,
436 	[3] = HBM_ID5,
437 };
438 
439 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = {
440 	[EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0,
441 	[EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2,
442 	[EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1,
443 	[EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3,
444 	[EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2,
445 	[EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4,
446 	[EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3,
447 	[EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5,
448 };
449 
450 static const int gaudi2_qman_async_event_id[] = {
451 	[GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM,
452 	[GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM,
453 	[GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM,
454 	[GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM,
455 	[GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM,
456 	[GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM,
457 	[GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM,
458 	[GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM,
459 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM,
460 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM,
461 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM,
462 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM,
463 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM,
464 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM,
465 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM,
466 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM,
467 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM,
468 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM,
469 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM,
470 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM,
471 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM,
472 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM,
473 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM,
474 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM,
475 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM,
476 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM,
477 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM,
478 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM,
479 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM,
480 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM,
481 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM,
482 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM,
483 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM,
484 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM,
485 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM,
486 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM,
487 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM,
488 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM,
489 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM,
490 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM,
491 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM,
492 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM,
493 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM,
494 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM,
495 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM,
496 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM,
497 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM,
498 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM,
499 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM,
500 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM,
501 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM,
502 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM,
503 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM,
504 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM,
505 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM,
506 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM,
507 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM,
508 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM,
509 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM,
510 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM,
511 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM,
512 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM,
513 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM,
514 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM,
515 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM,
516 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM,
517 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM,
518 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM,
519 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM,
520 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM,
521 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM,
522 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM,
523 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM,
524 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM,
525 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM,
526 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM,
527 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM,
528 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM,
529 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM,
530 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM,
531 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM,
532 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM,
533 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM,
534 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM,
535 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM,
536 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM,
537 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM,
538 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM,
539 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM,
540 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM,
541 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM,
542 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM,
543 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM,
544 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM,
545 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM,
546 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM,
547 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM,
548 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM,
549 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM,
550 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM,
551 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM,
552 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM,
553 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM,
554 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM,
555 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM,
556 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM,
557 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM,
558 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM,
559 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM,
560 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM,
561 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM,
562 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM,
563 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM,
564 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM,
565 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM,
566 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM,
567 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM,
568 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM,
569 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM,
570 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM,
571 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM,
572 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM,
573 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM,
574 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM,
575 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM,
576 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM,
577 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM,
578 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM,
579 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM,
580 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM,
581 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM,
582 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM,
583 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM,
584 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM,
585 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM,
586 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM,
587 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM,
588 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM,
589 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM,
590 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM,
591 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM,
592 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM,
593 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM,
594 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM,
595 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM,
596 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM,
597 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM,
598 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM,
599 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM,
600 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM,
601 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM,
602 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM,
603 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM,
604 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM,
605 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM,
606 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM,
607 	[GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0,
608 	[GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0,
609 	[GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0,
610 	[GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0,
611 	[GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1,
612 	[GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1,
613 	[GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1,
614 	[GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1,
615 	[GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0,
616 	[GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0,
617 	[GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0,
618 	[GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0,
619 	[GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1,
620 	[GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1,
621 	[GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1,
622 	[GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1,
623 	[GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0,
624 	[GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0,
625 	[GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0,
626 	[GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0,
627 	[GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1,
628 	[GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1,
629 	[GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1,
630 	[GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1,
631 	[GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0,
632 	[GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0,
633 	[GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0,
634 	[GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0,
635 	[GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1,
636 	[GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1,
637 	[GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1,
638 	[GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1,
639 	[GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0,
640 	[GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0,
641 	[GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0,
642 	[GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0,
643 	[GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1,
644 	[GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1,
645 	[GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1,
646 	[GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1,
647 	[GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0,
648 	[GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0,
649 	[GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0,
650 	[GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0,
651 	[GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1,
652 	[GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1,
653 	[GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1,
654 	[GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1,
655 	[GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0,
656 	[GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0,
657 	[GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0,
658 	[GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0,
659 	[GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1,
660 	[GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1,
661 	[GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1,
662 	[GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1,
663 	[GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0,
664 	[GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0,
665 	[GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0,
666 	[GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0,
667 	[GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1,
668 	[GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1,
669 	[GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1,
670 	[GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1,
671 	[GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0,
672 	[GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0,
673 	[GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0,
674 	[GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0,
675 	[GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1,
676 	[GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1,
677 	[GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1,
678 	[GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1,
679 	[GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0,
680 	[GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0,
681 	[GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0,
682 	[GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0,
683 	[GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1,
684 	[GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1,
685 	[GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1,
686 	[GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1,
687 	[GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0,
688 	[GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0,
689 	[GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0,
690 	[GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0,
691 	[GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1,
692 	[GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1,
693 	[GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1,
694 	[GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1,
695 	[GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0,
696 	[GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0,
697 	[GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0,
698 	[GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0,
699 	[GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1,
700 	[GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1,
701 	[GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1,
702 	[GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1,
703 	[GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
704 	[GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
705 	[GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
706 	[GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
707 	[GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
708 	[GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
709 	[GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
710 	[GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM
711 };
712 
713 static const int gaudi2_dma_core_async_event_id[] = {
714 	[DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE,
715 	[DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE,
716 	[DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE,
717 	[DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE,
718 	[DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE,
719 	[DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE,
720 	[DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE,
721 	[DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE,
722 	[DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE,
723 	[DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE,
724 	[DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
725 };
726 
727 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
728 	"qman sei intr",
729 	"arc sei intr"
730 };
731 
732 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = {
733 	"AXI_TERMINATOR WR",
734 	"AXI_TERMINATOR RD",
735 	"AXI SPLIT SEI Status"
736 };
737 
738 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = {
739 	"cbu_bresp_sei_intr_cause",
740 	"cbu_rresp_sei_intr_cause",
741 	"lbu_bresp_sei_intr_cause",
742 	"lbu_rresp_sei_intr_cause",
743 	"cbu_axi_split_intr_cause",
744 	"lbu_axi_split_intr_cause",
745 	"arc_ip_excptn_sei_intr_cause",
746 	"dmi_bresp_sei_intr_cause",
747 	"aux2apb_err_sei_intr_cause",
748 	"cfg_lbw_wr_terminated_intr_cause",
749 	"cfg_lbw_rd_terminated_intr_cause",
750 	"cfg_dccm_wr_terminated_intr_cause",
751 	"cfg_dccm_rd_terminated_intr_cause",
752 	"cfg_hbw_rd_terminated_intr_cause"
753 };
754 
755 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = {
756 	"msix_vcd_hbw_sei",
757 	"msix_l2c_hbw_sei",
758 	"msix_nrm_hbw_sei",
759 	"msix_abnrm_hbw_sei",
760 	"msix_vcd_lbw_sei",
761 	"msix_l2c_lbw_sei",
762 	"msix_nrm_lbw_sei",
763 	"msix_abnrm_lbw_sei",
764 	"apb_vcd_lbw_sei",
765 	"apb_l2c_lbw_sei",
766 	"apb_nrm_lbw_sei",
767 	"apb_abnrm_lbw_sei",
768 	"dec_sei",
769 	"dec_apb_sei",
770 	"trc_apb_sei",
771 	"lbw_mstr_if_sei",
772 	"axi_split_bresp_err_sei",
773 	"hbw_axi_wr_viol_sei",
774 	"hbw_axi_rd_viol_sei",
775 	"lbw_axi_wr_viol_sei",
776 	"lbw_axi_rd_viol_sei",
777 	"vcd_spi",
778 	"l2c_spi",
779 	"nrm_spi",
780 	"abnrm_spi",
781 };
782 
783 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = {
784 	"PQ AXI HBW error",
785 	"CQ AXI HBW error",
786 	"CP AXI HBW error",
787 	"CP error due to undefined OPCODE",
788 	"CP encountered STOP OPCODE",
789 	"CP AXI LBW error",
790 	"CP WRREG32 or WRBULK returned error",
791 	"N/A",
792 	"FENCE 0 inc over max value and clipped",
793 	"FENCE 1 inc over max value and clipped",
794 	"FENCE 2 inc over max value and clipped",
795 	"FENCE 3 inc over max value and clipped",
796 	"FENCE 0 dec under min value and clipped",
797 	"FENCE 1 dec under min value and clipped",
798 	"FENCE 2 dec under min value and clipped",
799 	"FENCE 3 dec under min value and clipped",
800 	"CPDMA Up overflow",
801 	"PQC L2H error"
802 };
803 
804 static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = {
805 	"RSVD0",
806 	"CQ AXI HBW error",
807 	"CP AXI HBW error",
808 	"CP error due to undefined OPCODE",
809 	"CP encountered STOP OPCODE",
810 	"CP AXI LBW error",
811 	"CP WRREG32 or WRBULK returned error",
812 	"N/A",
813 	"FENCE 0 inc over max value and clipped",
814 	"FENCE 1 inc over max value and clipped",
815 	"FENCE 2 inc over max value and clipped",
816 	"FENCE 3 inc over max value and clipped",
817 	"FENCE 0 dec under min value and clipped",
818 	"FENCE 1 dec under min value and clipped",
819 	"FENCE 2 dec under min value and clipped",
820 	"FENCE 3 dec under min value and clipped",
821 	"CPDMA Up overflow",
822 	"RSVD17",
823 	"CQ_WR_IFIFO_CI_ERR",
824 	"CQ_WR_CTL_CI_ERR",
825 	"ARC_CQF_RD_ERR",
826 	"ARC_CQ_WR_IFIFO_CI_ERR",
827 	"ARC_CQ_WR_CTL_CI_ERR",
828 	"ARC_AXI_ERR",
829 	"CP_SWITCH_WDT_ERR"
830 };
831 
832 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = {
833 	"Choice push while full error",
834 	"Choice Q watchdog error",
835 	"MSG AXI LBW returned with error"
836 };
837 
838 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = {
839 	"qm_axi_err",
840 	"qm_trace_fence_events",
841 	"qm_sw_err",
842 	"qm_cp_sw_stop",
843 	"lbw_mstr_rresp_err",
844 	"lbw_mstr_bresp_err",
845 	"lbw_msg_slverr",
846 	"hbw_msg_slverr",
847 	"wbc_slverr",
848 	"hbw_mstr_rresp_err",
849 	"hbw_mstr_bresp_err",
850 	"sb_resp_intr",
851 	"mrsb_resp_intr",
852 	"core_dw_status_0",
853 	"core_dw_status_1",
854 	"core_dw_status_2",
855 	"core_dw_status_3",
856 	"core_dw_status_4",
857 	"core_dw_status_5",
858 	"core_dw_status_6",
859 	"core_dw_status_7",
860 	"async_arc2cpu_sei_intr",
861 };
862 
863 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = {
864 	"tpc_address_exceed_slm",
865 	"tpc_div_by_0",
866 	"tpc_spu_mac_overflow",
867 	"tpc_spu_addsub_overflow",
868 	"tpc_spu_abs_overflow",
869 	"tpc_spu_fma_fp_dst_nan",
870 	"tpc_spu_fma_fp_dst_inf",
871 	"tpc_spu_convert_fp_dst_nan",
872 	"tpc_spu_convert_fp_dst_inf",
873 	"tpc_spu_fp_dst_denorm",
874 	"tpc_vpu_mac_overflow",
875 	"tpc_vpu_addsub_overflow",
876 	"tpc_vpu_abs_overflow",
877 	"tpc_vpu_convert_fp_dst_nan",
878 	"tpc_vpu_convert_fp_dst_inf",
879 	"tpc_vpu_fma_fp_dst_nan",
880 	"tpc_vpu_fma_fp_dst_inf",
881 	"tpc_vpu_fp_dst_denorm",
882 	"tpc_assertions",
883 	"tpc_illegal_instruction",
884 	"tpc_pc_wrap_around",
885 	"tpc_qm_sw_err",
886 	"tpc_hbw_rresp_err",
887 	"tpc_hbw_bresp_err",
888 	"tpc_lbw_rresp_err",
889 	"tpc_lbw_bresp_err",
890 	"st_unlock_already_locked",
891 	"invalid_lock_access",
892 	"LD_L protection violation",
893 	"ST_L protection violation",
894 };
895 
896 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
897 	"agu_resp_intr",
898 	"qman_axi_err",
899 	"wap sei (wbc axi err)",
900 	"arc sei",
901 	"cfg access error",
902 	"qm_sw_err",
903 	"sbte_dbg_intr_0",
904 	"sbte_dbg_intr_1",
905 	"sbte_dbg_intr_2",
906 	"sbte_dbg_intr_3",
907 	"sbte_dbg_intr_4",
908 	"sbte_prtn_intr_0",
909 	"sbte_prtn_intr_1",
910 	"sbte_prtn_intr_2",
911 	"sbte_prtn_intr_3",
912 	"sbte_prtn_intr_4",
913 };
914 
915 static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
916 	"i0",
917 	"i1",
918 	"i2",
919 	"i3",
920 	"i4",
921 };
922 
923 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
924 	"WBC ERR RESP_0",
925 	"WBC ERR RESP_1",
926 	"AP SOURCE POS INF",
927 	"AP SOURCE NEG INF",
928 	"AP SOURCE NAN",
929 	"AP RESULT POS INF",
930 	"AP RESULT NEG INF",
931 };
932 
933 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
934 	"HBW Read returned with error RRESP",
935 	"HBW write returned with error BRESP",
936 	"LBW write returned with error BRESP",
937 	"descriptor_fifo_overflow",
938 	"KDMA SB LBW Read returned with error",
939 	"KDMA WBC LBW Write returned with error",
940 	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
941 	"WRONG CFG FOR COMMIT IN LIN DMA"
942 };
943 
944 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
945 	"HBW/LBW Read returned with error RRESP",
946 	"HBW/LBW write returned with error BRESP",
947 	"LBW write returned with error BRESP",
948 	"descriptor_fifo_overflow",
949 	"KDMA SB LBW Read returned with error",
950 	"KDMA WBC LBW Write returned with error",
951 	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
952 	"WRONG CFG FOR COMMIT IN LIN DMA"
953 };
954 
955 struct gaudi2_sm_sei_cause_data {
956 	const char *cause_name;
957 	const char *log_name;
958 };
959 
960 static const struct gaudi2_sm_sei_cause_data
961 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = {
962 	{"calculated SO value overflow/underflow", "SOB ID"},
963 	{"payload address of monitor is not aligned to 4B", "monitor addr"},
964 	{"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id"},
965 };
966 
967 static const char * const
968 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = {
969 	"LATENCY_RD_OUT_FIFO_OVERRUN",
970 	"LATENCY_WR_OUT_FIFO_OVERRUN",
971 };
972 
973 static const char * const
974 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = {
975 	"LATENCY_RD_OUT_FIFO_OVERRUN",
976 	"LATENCY_WR_OUT_FIFO_OVERRUN",
977 };
978 
979 static const char * const
980 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = {
981 	"AXI drain HBW",
982 	"AXI drain LBW",
983 };
984 
985 static const char * const
986 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
987 	"HBW error response",
988 	"LBW error response",
989 	"TLP is blocked by RR"
990 };
991 
992 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
993 	[GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
994 	[GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
995 	[GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE,
996 	[GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE,
997 	[GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE,
998 	[GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE,
999 	[GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE,
1000 	[GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE,
1001 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE,
1002 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE,
1003 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE,
1004 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE,
1005 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE,
1006 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE,
1007 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE,
1008 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE,
1009 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE,
1010 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE,
1011 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE,
1012 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE,
1013 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE,
1014 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE,
1015 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE,
1016 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE,
1017 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE,
1018 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE,
1019 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE,
1020 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE,
1021 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE,
1022 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE,
1023 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE,
1024 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE,
1025 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE,
1026 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE,
1027 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE,
1028 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE,
1029 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE,
1030 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE,
1031 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE,
1032 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE,
1033 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE,
1034 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE,
1035 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE,
1036 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE,
1037 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE,
1038 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE,
1039 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE,
1040 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE,
1041 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE,
1042 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE,
1043 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE,
1044 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE,
1045 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE,
1046 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE,
1047 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE,
1048 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE,
1049 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE,
1050 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE,
1051 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE,
1052 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE,
1053 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE,
1054 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE,
1055 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE,
1056 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE,
1057 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE,
1058 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE,
1059 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE,
1060 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE,
1061 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE,
1062 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE,
1063 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE,
1064 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE,
1065 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE,
1066 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE,
1067 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE,
1068 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE,
1069 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE,
1070 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE,
1071 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE,
1072 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE,
1073 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE,
1074 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE,
1075 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE,
1076 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE,
1077 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE,
1078 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE,
1079 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE,
1080 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE,
1081 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE,
1082 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE,
1083 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE,
1084 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE,
1085 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE,
1086 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE,
1087 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE,
1088 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE,
1089 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE,
1090 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE,
1091 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE,
1092 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE,
1093 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE,
1094 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE,
1095 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE,
1096 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE,
1097 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE,
1098 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE,
1099 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE,
1100 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE,
1101 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE,
1102 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE,
1103 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE,
1104 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE,
1105 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE,
1106 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE,
1107 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE,
1108 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE,
1109 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE,
1110 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE,
1111 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE,
1112 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE,
1113 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE,
1114 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE,
1115 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE,
1116 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE,
1117 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE,
1118 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE,
1119 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE,
1120 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE,
1121 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE,
1122 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE,
1123 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE,
1124 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE,
1125 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE,
1126 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE,
1127 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE,
1128 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE,
1129 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE,
1130 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE,
1131 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE,
1132 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE,
1133 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE,
1134 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE,
1135 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE,
1136 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE,
1137 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE,
1138 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE,
1139 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE,
1140 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE,
1141 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE,
1142 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE,
1143 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE,
1144 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE,
1145 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE,
1146 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE,
1147 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE,
1148 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE,
1149 	[GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE,
1150 	[GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE,
1151 	[GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE,
1152 	[GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE,
1153 	[GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE,
1154 	[GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE,
1155 	[GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE,
1156 	[GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE,
1157 	[GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE,
1158 	[GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE,
1159 	[GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE,
1160 	[GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE,
1161 	[GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE,
1162 	[GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE,
1163 	[GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE,
1164 	[GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE,
1165 	[GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE,
1166 	[GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE,
1167 	[GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE,
1168 	[GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE,
1169 	[GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE,
1170 	[GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE,
1171 	[GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE,
1172 	[GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE,
1173 	[GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE,
1174 	[GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE,
1175 	[GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE,
1176 	[GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE,
1177 	[GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE,
1178 	[GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE,
1179 	[GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE,
1180 	[GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE,
1181 	[GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE,
1182 	[GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE,
1183 	[GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE,
1184 	[GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE,
1185 	[GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE,
1186 	[GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE,
1187 	[GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE,
1188 	[GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE,
1189 	[GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE,
1190 	[GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE,
1191 	[GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE,
1192 	[GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE,
1193 	[GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE,
1194 	[GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE,
1195 	[GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE,
1196 	[GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE,
1197 	[GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE,
1198 	[GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE,
1199 	[GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE,
1200 	[GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE,
1201 	[GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE,
1202 	[GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE,
1203 	[GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE,
1204 	[GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE,
1205 	[GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE,
1206 	[GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE,
1207 	[GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE,
1208 	[GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE,
1209 	[GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE,
1210 	[GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE,
1211 	[GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE,
1212 	[GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE,
1213 	[GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE,
1214 	[GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE,
1215 	[GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE,
1216 	[GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE,
1217 	[GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE,
1218 	[GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE,
1219 	[GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE,
1220 	[GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE,
1221 	[GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE,
1222 	[GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE,
1223 	[GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE,
1224 	[GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE,
1225 	[GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE,
1226 	[GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE,
1227 	[GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE,
1228 	[GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE,
1229 	[GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE,
1230 	[GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE,
1231 	[GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE,
1232 	[GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE,
1233 	[GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE,
1234 	[GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE,
1235 	[GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE,
1236 	[GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE,
1237 	[GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE,
1238 	[GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE,
1239 	[GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE,
1240 	[GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE,
1241 	[GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE,
1242 	[GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE,
1243 	[GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE,
1244 	[GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE,
1245 	[GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE,
1246 	[GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE,
1247 	[GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE,
1248 	[GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE,
1249 	[GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE,
1250 	[GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE,
1251 	[GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE,
1252 	[GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE
1253 };
1254 
1255 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = {
1256 	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE,
1257 	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE,
1258 	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE,
1259 	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE,
1260 	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE,
1261 	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE,
1262 	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE,
1263 	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE,
1264 	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE,
1265 	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE,
1266 	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE,
1267 	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE,
1268 	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE,
1269 	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE,
1270 	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE,
1271 	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE,
1272 	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE,
1273 	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE,
1274 	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE,
1275 	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE,
1276 	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE,
1277 	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE,
1278 	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE,
1279 	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE,
1280 	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE,
1281 	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE,
1282 	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE,
1283 	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE,
1284 	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE,
1285 	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE,
1286 	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE,
1287 	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE,
1288 	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE,
1289 	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE,
1290 	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE,
1291 	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE,
1292 	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE,
1293 	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE,
1294 	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE,
1295 	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE,
1296 	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE,
1297 	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE,
1298 	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE,
1299 	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE,
1300 	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE,
1301 	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE,
1302 	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE,
1303 	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE,
1304 	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE,
1305 	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE,
1306 	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE,
1307 	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE,
1308 	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE,
1309 	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE,
1310 	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE,
1311 	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE,
1312 	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE,
1313 	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE,
1314 	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE,
1315 	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE,
1316 	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE,
1317 	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE,
1318 	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE,
1319 	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE,
1320 	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE,
1321 	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE,
1322 	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE,
1323 	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE,
1324 	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE,
1325 };
1326 
1327 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = {
1328 	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE,
1329 	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE,
1330 	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE,
1331 	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE,
1332 	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE,
1333 	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE,
1334 	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE,
1335 	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE,
1336 	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE,
1337 	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE,
1338 	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE,
1339 	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE,
1340 	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE,
1341 	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE,
1342 	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE,
1343 	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE,
1344 	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE,
1345 	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE,
1346 	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE,
1347 	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE,
1348 	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE,
1349 	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE,
1350 	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE,
1351 	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE,
1352 	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE,
1353 	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE,
1354 	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE,
1355 	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE,
1356 	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE,
1357 	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE,
1358 	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE,
1359 	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE,
1360 	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE,
1361 	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE,
1362 	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE,
1363 	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE,
1364 	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE,
1365 	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE,
1366 	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE,
1367 	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE,
1368 	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE,
1369 	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE,
1370 	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE,
1371 	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE,
1372 	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE,
1373 	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE,
1374 	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE,
1375 	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE,
1376 	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE,
1377 	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE,
1378 	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE,
1379 	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE,
1380 	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE,
1381 	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE,
1382 	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE,
1383 	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE,
1384 	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE,
1385 	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE,
1386 	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE,
1387 	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE,
1388 	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE,
1389 	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE,
1390 	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE,
1391 	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE,
1392 	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE,
1393 	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE,
1394 	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE,
1395 	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE,
1396 	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE,
1397 };
1398 
1399 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = {
1400 	[MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE,
1401 	[MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE,
1402 	[MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE,
1403 	[MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE,
1404 };
1405 
1406 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = {
1407 	[GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0,
1408 	[GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0,
1409 	[GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0,
1410 	[GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0,
1411 	[GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1,
1412 	[GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1,
1413 	[GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1,
1414 	[GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1,
1415 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0,
1416 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0,
1417 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0,
1418 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0,
1419 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1,
1420 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1,
1421 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1,
1422 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1,
1423 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0,
1424 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0,
1425 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0,
1426 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0,
1427 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0,
1428 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0,
1429 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0,
1430 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0,
1431 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1,
1432 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1,
1433 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1,
1434 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1,
1435 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2,
1436 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2,
1437 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2,
1438 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2,
1439 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3,
1440 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3,
1441 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3,
1442 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3,
1443 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4,
1444 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4,
1445 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4,
1446 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4,
1447 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5,
1448 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5,
1449 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5,
1450 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5,
1451 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24,
1452 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24,
1453 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24,
1454 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24,
1455 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2,
1456 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2,
1457 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2,
1458 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2,
1459 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3,
1460 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3,
1461 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3,
1462 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3,
1463 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4,
1464 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4,
1465 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4,
1466 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4,
1467 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6,
1468 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6,
1469 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6,
1470 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6,
1471 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7,
1472 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7,
1473 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7,
1474 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7,
1475 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8,
1476 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8,
1477 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8,
1478 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8,
1479 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9,
1480 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9,
1481 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9,
1482 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9,
1483 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10,
1484 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10,
1485 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10,
1486 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10,
1487 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11,
1488 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11,
1489 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11,
1490 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11,
1491 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4,
1492 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4,
1493 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4,
1494 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4,
1495 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5,
1496 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5,
1497 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5,
1498 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5,
1499 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1,
1500 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1,
1501 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1,
1502 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1,
1503 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12,
1504 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12,
1505 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12,
1506 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12,
1507 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13,
1508 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13,
1509 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13,
1510 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13,
1511 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14,
1512 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14,
1513 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14,
1514 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14,
1515 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15,
1516 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15,
1517 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15,
1518 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15,
1519 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16,
1520 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16,
1521 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16,
1522 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16,
1523 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17,
1524 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17,
1525 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17,
1526 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17,
1527 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6,
1528 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6,
1529 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6,
1530 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6,
1531 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7,
1532 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7,
1533 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7,
1534 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7,
1535 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5,
1536 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5,
1537 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5,
1538 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5,
1539 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18,
1540 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18,
1541 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18,
1542 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18,
1543 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19,
1544 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19,
1545 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19,
1546 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19,
1547 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20,
1548 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20,
1549 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20,
1550 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20,
1551 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21,
1552 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21,
1553 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21,
1554 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21,
1555 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22,
1556 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22,
1557 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22,
1558 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22,
1559 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23,
1560 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23,
1561 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23,
1562 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23,
1563 	[GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0,
1564 	[GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0,
1565 	[GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0,
1566 	[GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0,
1567 	[GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1,
1568 	[GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1,
1569 	[GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1,
1570 	[GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1,
1571 	[GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2,
1572 	[GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2,
1573 	[GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2,
1574 	[GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2,
1575 	[GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3,
1576 	[GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3,
1577 	[GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3,
1578 	[GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3,
1579 	[GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4,
1580 	[GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4,
1581 	[GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4,
1582 	[GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4,
1583 	[GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5,
1584 	[GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5,
1585 	[GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5,
1586 	[GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5,
1587 	[GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6,
1588 	[GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6,
1589 	[GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6,
1590 	[GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6,
1591 	[GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7,
1592 	[GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7,
1593 	[GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7,
1594 	[GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7,
1595 	[GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8,
1596 	[GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8,
1597 	[GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8,
1598 	[GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8,
1599 	[GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9,
1600 	[GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9,
1601 	[GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9,
1602 	[GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9,
1603 	[GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10,
1604 	[GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10,
1605 	[GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10,
1606 	[GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10,
1607 	[GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11,
1608 	[GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11,
1609 	[GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11,
1610 	[GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11,
1611 	[GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12,
1612 	[GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12,
1613 	[GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12,
1614 	[GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12,
1615 	[GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13,
1616 	[GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13,
1617 	[GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13,
1618 	[GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13,
1619 	[GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14,
1620 	[GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14,
1621 	[GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14,
1622 	[GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14,
1623 	[GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15,
1624 	[GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15,
1625 	[GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15,
1626 	[GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15,
1627 	[GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16,
1628 	[GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16,
1629 	[GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16,
1630 	[GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16,
1631 	[GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17,
1632 	[GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17,
1633 	[GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17,
1634 	[GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17,
1635 	[GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18,
1636 	[GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18,
1637 	[GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18,
1638 	[GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18,
1639 	[GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19,
1640 	[GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19,
1641 	[GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19,
1642 	[GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19,
1643 	[GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20,
1644 	[GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20,
1645 	[GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20,
1646 	[GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20,
1647 	[GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21,
1648 	[GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21,
1649 	[GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21,
1650 	[GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21,
1651 	[GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22,
1652 	[GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22,
1653 	[GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22,
1654 	[GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22,
1655 	[GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23,
1656 	[GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23,
1657 	[GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23,
1658 	[GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23,
1659 	[GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0,
1660 	[GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0,
1661 	[GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0,
1662 	[GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0,
1663 	[GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1,
1664 	[GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1,
1665 	[GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1,
1666 	[GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1
1667 };
1668 
1669 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = {
1670 	[DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE,
1671 	[DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE,
1672 	[DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE,
1673 	[DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE,
1674 	[DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE,
1675 	[DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE,
1676 	[DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE,
1677 	[DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE,
1678 	[DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE,
1679 	[DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE,
1680 	[DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE
1681 };
1682 
1683 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = {
1684 	[MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE,
1685 	[MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE,
1686 	[MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE,
1687 	[MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE
1688 };
1689 
1690 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
1691 	[TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE,
1692 	[TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE,
1693 	[TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE,
1694 	[TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE,
1695 	[TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE,
1696 	[TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE,
1697 	[TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE,
1698 	[TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE,
1699 	[TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE,
1700 	[TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE,
1701 	[TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE,
1702 	[TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE,
1703 	[TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE,
1704 	[TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE,
1705 	[TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE,
1706 	[TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE,
1707 	[TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE,
1708 	[TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE,
1709 	[TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE,
1710 	[TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE,
1711 	[TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE,
1712 	[TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE,
1713 	[TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE,
1714 	[TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE,
1715 	[TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
1716 };
1717 
1718 static const u32 gaudi2_tpc_eml_cfg_blocks_bases[TPC_ID_SIZE] = {
1719 	[TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_EML_CFG_BASE,
1720 	[TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_EML_CFG_BASE,
1721 	[TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_EML_CFG_BASE,
1722 	[TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_EML_CFG_BASE,
1723 	[TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_EML_CFG_BASE,
1724 	[TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_EML_CFG_BASE,
1725 	[TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_EML_CFG_BASE,
1726 	[TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_EML_CFG_BASE,
1727 	[TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_EML_CFG_BASE,
1728 	[TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_EML_CFG_BASE,
1729 	[TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_EML_CFG_BASE,
1730 	[TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_EML_CFG_BASE,
1731 	[TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_EML_CFG_BASE,
1732 	[TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_EML_CFG_BASE,
1733 	[TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_EML_CFG_BASE,
1734 	[TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_EML_CFG_BASE,
1735 	[TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_EML_CFG_BASE,
1736 	[TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_EML_CFG_BASE,
1737 	[TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_EML_CFG_BASE,
1738 	[TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_EML_CFG_BASE,
1739 	[TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_EML_CFG_BASE,
1740 	[TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_EML_CFG_BASE,
1741 	[TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_EML_CFG_BASE,
1742 	[TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_EML_CFG_BASE,
1743 	[TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_EML_CFG_BASE,
1744 };
1745 
1746 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
1747 	[ROTATOR_ID_0] = mmROT0_BASE,
1748 	[ROTATOR_ID_1] = mmROT1_BASE
1749 };
1750 
1751 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = {
1752 	[TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0,
1753 	[TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0,
1754 	[TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0,
1755 	[TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0,
1756 	[TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0,
1757 	[TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0,
1758 	[TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0,
1759 	[TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0,
1760 	[TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0,
1761 	[TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0,
1762 	[TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0,
1763 	[TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0,
1764 	[TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0,
1765 	[TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0,
1766 	[TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0,
1767 	[TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0,
1768 	[TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0,
1769 	[TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0,
1770 	[TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0,
1771 	[TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0,
1772 	[TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0,
1773 	[TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0,
1774 	[TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0,
1775 	[TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0,
1776 	[TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0,
1777 };
1778 
1779 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
1780 	[ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0,
1781 	[ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
1782 };
1783 
1784 static const u32 gaudi2_tpc_engine_id_to_tpc_id[] = {
1785 	[GAUDI2_DCORE0_ENGINE_ID_TPC_0] = TPC_ID_DCORE0_TPC0,
1786 	[GAUDI2_DCORE0_ENGINE_ID_TPC_1] = TPC_ID_DCORE0_TPC1,
1787 	[GAUDI2_DCORE0_ENGINE_ID_TPC_2] = TPC_ID_DCORE0_TPC2,
1788 	[GAUDI2_DCORE0_ENGINE_ID_TPC_3] = TPC_ID_DCORE0_TPC3,
1789 	[GAUDI2_DCORE0_ENGINE_ID_TPC_4] = TPC_ID_DCORE0_TPC4,
1790 	[GAUDI2_DCORE0_ENGINE_ID_TPC_5] = TPC_ID_DCORE0_TPC5,
1791 	[GAUDI2_DCORE1_ENGINE_ID_TPC_0] = TPC_ID_DCORE1_TPC0,
1792 	[GAUDI2_DCORE1_ENGINE_ID_TPC_1] = TPC_ID_DCORE1_TPC1,
1793 	[GAUDI2_DCORE1_ENGINE_ID_TPC_2] = TPC_ID_DCORE1_TPC2,
1794 	[GAUDI2_DCORE1_ENGINE_ID_TPC_3] = TPC_ID_DCORE1_TPC3,
1795 	[GAUDI2_DCORE1_ENGINE_ID_TPC_4] = TPC_ID_DCORE1_TPC4,
1796 	[GAUDI2_DCORE1_ENGINE_ID_TPC_5] = TPC_ID_DCORE1_TPC5,
1797 	[GAUDI2_DCORE2_ENGINE_ID_TPC_0] = TPC_ID_DCORE2_TPC0,
1798 	[GAUDI2_DCORE2_ENGINE_ID_TPC_1] = TPC_ID_DCORE2_TPC1,
1799 	[GAUDI2_DCORE2_ENGINE_ID_TPC_2] = TPC_ID_DCORE2_TPC2,
1800 	[GAUDI2_DCORE2_ENGINE_ID_TPC_3] = TPC_ID_DCORE2_TPC3,
1801 	[GAUDI2_DCORE2_ENGINE_ID_TPC_4] = TPC_ID_DCORE2_TPC4,
1802 	[GAUDI2_DCORE2_ENGINE_ID_TPC_5] = TPC_ID_DCORE2_TPC5,
1803 	[GAUDI2_DCORE3_ENGINE_ID_TPC_0] = TPC_ID_DCORE3_TPC0,
1804 	[GAUDI2_DCORE3_ENGINE_ID_TPC_1] = TPC_ID_DCORE3_TPC1,
1805 	[GAUDI2_DCORE3_ENGINE_ID_TPC_2] = TPC_ID_DCORE3_TPC2,
1806 	[GAUDI2_DCORE3_ENGINE_ID_TPC_3] = TPC_ID_DCORE3_TPC3,
1807 	[GAUDI2_DCORE3_ENGINE_ID_TPC_4] = TPC_ID_DCORE3_TPC4,
1808 	[GAUDI2_DCORE3_ENGINE_ID_TPC_5] = TPC_ID_DCORE3_TPC5,
1809 	/* the PCI TPC is placed last (mapped liked HW) */
1810 	[GAUDI2_DCORE0_ENGINE_ID_TPC_6] = TPC_ID_DCORE0_TPC6,
1811 };
1812 
1813 static const u32 gaudi2_mme_engine_id_to_mme_id[] = {
1814 	[GAUDI2_DCORE0_ENGINE_ID_MME] = MME_ID_DCORE0,
1815 	[GAUDI2_DCORE1_ENGINE_ID_MME] = MME_ID_DCORE1,
1816 	[GAUDI2_DCORE2_ENGINE_ID_MME] = MME_ID_DCORE2,
1817 	[GAUDI2_DCORE3_ENGINE_ID_MME] = MME_ID_DCORE3,
1818 };
1819 
1820 static const u32 gaudi2_edma_engine_id_to_edma_id[] = {
1821 	[GAUDI2_ENGINE_ID_PDMA_0] = DMA_CORE_ID_PDMA0,
1822 	[GAUDI2_ENGINE_ID_PDMA_1] = DMA_CORE_ID_PDMA1,
1823 	[GAUDI2_DCORE0_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA0,
1824 	[GAUDI2_DCORE0_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA1,
1825 	[GAUDI2_DCORE1_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA2,
1826 	[GAUDI2_DCORE1_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA3,
1827 	[GAUDI2_DCORE2_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA4,
1828 	[GAUDI2_DCORE2_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA5,
1829 	[GAUDI2_DCORE3_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA6,
1830 	[GAUDI2_DCORE3_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA7,
1831 	[GAUDI2_ENGINE_ID_KDMA] = DMA_CORE_ID_KDMA,
1832 };
1833 
1834 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1835 	GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
1836 	GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
1837 	GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
1838 	GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0,
1839 	GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
1840 	GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0,
1841 	GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0,
1842 	GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0,
1843 };
1844 
1845 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = {
1846 	"gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal",
1847 	"gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal",
1848 	"gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal",
1849 	"gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal",
1850 	"gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal",
1851 	"gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal",
1852 	"gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal",
1853 	"gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal",
1854 	"gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal",
1855 	"gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
1856 };
1857 
1858 enum rtr_id {
1859 	DCORE0_RTR0,
1860 	DCORE0_RTR1,
1861 	DCORE0_RTR2,
1862 	DCORE0_RTR3,
1863 	DCORE0_RTR4,
1864 	DCORE0_RTR5,
1865 	DCORE0_RTR6,
1866 	DCORE0_RTR7,
1867 	DCORE1_RTR0,
1868 	DCORE1_RTR1,
1869 	DCORE1_RTR2,
1870 	DCORE1_RTR3,
1871 	DCORE1_RTR4,
1872 	DCORE1_RTR5,
1873 	DCORE1_RTR6,
1874 	DCORE1_RTR7,
1875 	DCORE2_RTR0,
1876 	DCORE2_RTR1,
1877 	DCORE2_RTR2,
1878 	DCORE2_RTR3,
1879 	DCORE2_RTR4,
1880 	DCORE2_RTR5,
1881 	DCORE2_RTR6,
1882 	DCORE2_RTR7,
1883 	DCORE3_RTR0,
1884 	DCORE3_RTR1,
1885 	DCORE3_RTR2,
1886 	DCORE3_RTR3,
1887 	DCORE3_RTR4,
1888 	DCORE3_RTR5,
1889 	DCORE3_RTR6,
1890 	DCORE3_RTR7,
1891 };
1892 
1893 static const u32 gaudi2_tpc_initiator_hbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1894 	DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
1895 	DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
1896 	DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
1897 	DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
1898 	DCORE0_RTR0
1899 };
1900 
1901 static const u32 gaudi2_tpc_initiator_lbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1902 	DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2,
1903 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5,
1904 	DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, DCORE2_RTR0, DCORE2_RTR0,
1905 	DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, DCORE3_RTR7, DCORE3_RTR7,
1906 	DCORE0_RTR0
1907 };
1908 
1909 static const u32 gaudi2_dec_initiator_hbw_rtr_id[NUMBER_OF_DEC] = {
1910 	DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
1911 	DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
1912 };
1913 
1914 static const u32 gaudi2_dec_initiator_lbw_rtr_id[NUMBER_OF_DEC] = {
1915 	DCORE0_RTR1, DCORE0_RTR1, DCORE1_RTR6, DCORE1_RTR6, DCORE2_RTR1, DCORE2_RTR1,
1916 	DCORE3_RTR6, DCORE3_RTR6, DCORE0_RTR0, DCORE0_RTR0
1917 };
1918 
1919 static const u32 gaudi2_nic_initiator_hbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
1920 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1921 	DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1922 };
1923 
1924 static const u32 gaudi2_nic_initiator_lbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
1925 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1926 	DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1927 };
1928 
1929 static const u32 gaudi2_edma_initiator_hbw_sft[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1930 	mmSFT0_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1931 	mmSFT0_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1932 	mmSFT1_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1933 	mmSFT1_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1934 	mmSFT2_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1935 	mmSFT2_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1936 	mmSFT3_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1937 	mmSFT3_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE
1938 };
1939 
1940 static const u32 gaudi2_pdma_initiator_hbw_rtr_id[NUM_OF_PDMA] = {
1941 	DCORE0_RTR0, DCORE0_RTR0
1942 };
1943 
1944 static const u32 gaudi2_pdma_initiator_lbw_rtr_id[NUM_OF_PDMA] = {
1945 	DCORE0_RTR2, DCORE0_RTR2
1946 };
1947 
1948 static const u32 gaudi2_rot_initiator_hbw_rtr_id[NUM_OF_ROT] = {
1949 	DCORE2_RTR0, DCORE3_RTR7
1950 };
1951 
1952 static const u32 gaudi2_rot_initiator_lbw_rtr_id[NUM_OF_ROT] = {
1953 	DCORE2_RTR2, DCORE3_RTR5
1954 };
1955 
1956 struct mme_initiators_rtr_id {
1957 	u32 wap0;
1958 	u32 wap1;
1959 	u32 write;
1960 	u32 read;
1961 	u32 sbte0;
1962 	u32 sbte1;
1963 	u32 sbte2;
1964 	u32 sbte3;
1965 	u32 sbte4;
1966 };
1967 
1968 enum mme_initiators {
1969 	MME_WAP0 = 0,
1970 	MME_WAP1,
1971 	MME_WRITE,
1972 	MME_READ,
1973 	MME_SBTE0,
1974 	MME_SBTE1,
1975 	MME_SBTE2,
1976 	MME_SBTE3,
1977 	MME_SBTE4,
1978 	MME_INITIATORS_MAX
1979 };
1980 
1981 static const struct mme_initiators_rtr_id
1982 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = {
1983 	{ .wap0 = 5, .wap1 = 7, .write = 6, .read = 7,
1984 	.sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6},
1985 	{ .wap0 = 10, .wap1 = 8, .write = 9, .read = 8,
1986 	.sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8},
1987 	{ .wap0 = 21, .wap1 = 23, .write = 22, .read = 23,
1988 	.sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23},
1989 	{ .wap0 = 30, .wap1 = 28, .write = 29, .read = 30,
1990 	.sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28},
1991 };
1992 
1993 enum razwi_event_sources {
1994 	RAZWI_TPC,
1995 	RAZWI_MME,
1996 	RAZWI_EDMA,
1997 	RAZWI_PDMA,
1998 	RAZWI_NIC,
1999 	RAZWI_DEC,
2000 	RAZWI_ROT
2001 };
2002 
2003 struct hbm_mc_error_causes {
2004 	u32 mask;
2005 	char cause[50];
2006 };
2007 
2008 static struct hl_special_block_info gaudi2_special_blocks[] = GAUDI2_SPECIAL_BLOCKS;
2009 
2010 /* Special blocks iterator is currently used to configure security protection bits,
2011  * and read global errors. Most HW blocks are addressable and those who aren't (N/A)-
2012  * must be skipped. Following configurations are commonly used for both PB config
2013  * and global error reading, since currently they both share the same settings.
2014  * Once it changes, we must remember to use separate configurations for either one.
2015  */
2016 static int gaudi2_iterator_skip_block_types[] = {
2017 		GAUDI2_BLOCK_TYPE_PLL,
2018 		GAUDI2_BLOCK_TYPE_EU_BIST,
2019 		GAUDI2_BLOCK_TYPE_HBM,
2020 		GAUDI2_BLOCK_TYPE_XFT
2021 };
2022 
2023 static struct range gaudi2_iterator_skip_block_ranges[] = {
2024 		/* Skip all PSOC blocks except for PSOC_GLOBAL_CONF */
2025 		{mmPSOC_I2C_M0_BASE, mmPSOC_EFUSE_BASE},
2026 		{mmPSOC_BTL_BASE, mmPSOC_MSTR_IF_RR_SHRD_HBW_BASE},
2027 		/* Skip all CPU blocks except for CPU_IF */
2028 		{mmCPU_CA53_CFG_BASE, mmCPU_CA53_CFG_BASE},
2029 		{mmCPU_TIMESTAMP_BASE, mmCPU_MSTR_IF_RR_SHRD_HBW_BASE}
2030 };
2031 
2032 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = {
2033 	{HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"},
2034 	{HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"},
2035 	{HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"},
2036 	{HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"},
2037 	{HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"},
2038 };
2039 
2040 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = {
2041 	[HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even",
2042 	[HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd",
2043 	[HBM_SEI_READ_ERR] = "SEI read data error",
2044 	[HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error",
2045 	[HBM_SEI_CATTRIP] = "SEI CATTRIP asserted",
2046 	[HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail",
2047 	[HBM_SEI_DFI] = "SEI DFI error",
2048 	[HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read",
2049 	[HBM_SEI_BIST_FAIL] = "SEI BIST fail"
2050 };
2051 
2052 struct mmu_spi_sei_cause {
2053 	char cause[50];
2054 	int clear_bit;
2055 };
2056 
2057 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = {
2058 	{"page fault", 1},		/* INTERRUPT_CLR[1] */
2059 	{"page access", 1},		/* INTERRUPT_CLR[1] */
2060 	{"bypass ddr", 2},		/* INTERRUPT_CLR[2] */
2061 	{"multi hit", 2},		/* INTERRUPT_CLR[2] */
2062 	{"mmu rei0", -1},		/* no clear register bit */
2063 	{"mmu rei1", -1},		/* no clear register bit */
2064 	{"stlb rei0", -1},		/* no clear register bit */
2065 	{"stlb rei1", -1},		/* no clear register bit */
2066 	{"rr privileged write hit", 2},	/* INTERRUPT_CLR[2] */
2067 	{"rr privileged read hit", 2},	/* INTERRUPT_CLR[2] */
2068 	{"rr secure write hit", 2},	/* INTERRUPT_CLR[2] */
2069 	{"rr secure read hit", 2},	/* INTERRUPT_CLR[2] */
2070 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2071 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2072 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2073 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2074 	{"slave error", 16},		/* INTERRUPT_CLR[16] */
2075 	{"dec error", 17},		/* INTERRUPT_CLR[17] */
2076 	{"burst fifo full", 2}		/* INTERRUPT_CLR[2] */
2077 };
2078 
2079 struct gaudi2_cache_invld_params {
2080 	u64 start_va;
2081 	u64 end_va;
2082 	u32 inv_start_val;
2083 	u32 flags;
2084 	bool range_invalidation;
2085 };
2086 
2087 struct gaudi2_tpc_idle_data {
2088 	struct engines_data *e;
2089 	unsigned long *mask;
2090 	bool *is_idle;
2091 	const char *tpc_fmt;
2092 };
2093 
2094 struct gaudi2_tpc_mmu_data {
2095 	u32 rw_asid;
2096 };
2097 
2098 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0};
2099 
2100 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val);
2101 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id);
2102 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id);
2103 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2104 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2105 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
2106 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
2107 										bool is_memset);
2108 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2109 		struct engines_data *e);
2110 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2111 		struct engines_data *e);
2112 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2113 		struct engines_data *e);
2114 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
2115 
2116 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
2117 {
2118 
2119 }
2120 
2121 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev)
2122 {
2123 	return sizeof(struct packet_msg_short);
2124 }
2125 
2126 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev)
2127 {
2128 	return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence);
2129 }
2130 
2131 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
2132 {
2133 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2134 	int dcore, inst, tpc_seq;
2135 	u32 offset;
2136 
2137 	/* init the return code */
2138 	ctx->rc = 0;
2139 
2140 	for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
2141 		for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
2142 			tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
2143 
2144 			if (!(prop->tpc_enabled_mask & BIT(tpc_seq)))
2145 				continue;
2146 
2147 			offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
2148 
2149 			ctx->fn(hdev, dcore, inst, offset, ctx);
2150 			if (ctx->rc) {
2151 				dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
2152 							dcore, inst);
2153 				return;
2154 			}
2155 		}
2156 	}
2157 
2158 	if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6)))
2159 		return;
2160 
2161 	/* special check for PCI TPC (DCORE0_TPC6) */
2162 	offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
2163 	ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
2164 	if (ctx->rc)
2165 		dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
2166 }
2167 
2168 static bool gaudi2_host_phys_addr_valid(u64 addr)
2169 {
2170 	if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1))
2171 		return true;
2172 
2173 	return false;
2174 }
2175 
2176 static int set_number_of_functional_hbms(struct hl_device *hdev)
2177 {
2178 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2179 	u8 faulty_hbms = hweight64(hdev->dram_binning);
2180 
2181 	/* check if all HBMs should be used */
2182 	if (!faulty_hbms) {
2183 		dev_dbg(hdev->dev, "All HBM are in use (no binning)\n");
2184 		prop->num_functional_hbms = GAUDI2_HBM_NUM;
2185 		return 0;
2186 	}
2187 
2188 	/*
2189 	 * check for error condition in which number of binning
2190 	 * candidates is higher than the maximum supported by the
2191 	 * driver (in which case binning mask shall be ignored and driver will
2192 	 * set the default)
2193 	 */
2194 	if (faulty_hbms > MAX_FAULTY_HBMS) {
2195 		dev_err(hdev->dev,
2196 			"HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n",
2197 			MAX_FAULTY_HBMS, hdev->dram_binning);
2198 		return -EINVAL;
2199 	}
2200 
2201 	/*
2202 	 * by default, number of functional HBMs in Gaudi2 is always
2203 	 * GAUDI2_HBM_NUM - 1.
2204 	 */
2205 	prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms;
2206 	return 0;
2207 }
2208 
2209 static int gaudi2_set_dram_properties(struct hl_device *hdev)
2210 {
2211 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2212 	u32 basic_hbm_page_size;
2213 	int rc;
2214 
2215 	rc = set_number_of_functional_hbms(hdev);
2216 	if (rc)
2217 		return -EINVAL;
2218 
2219 	/*
2220 	 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround
2221 	 * in which we are using x16 bigger page size to be able to populate the entire
2222 	 * HBM mappings in the TLB
2223 	 */
2224 	basic_hbm_page_size = prop->num_functional_hbms * SZ_8M;
2225 	prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size;
2226 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
2227 	prop->dram_size = prop->num_functional_hbms * SZ_16G;
2228 	prop->dram_base_address = DRAM_PHYS_BASE;
2229 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
2230 	prop->dram_supports_virtual_memory = true;
2231 
2232 	prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size;
2233 	prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK;
2234 	prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START;
2235 	prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END;
2236 
2237 	/* since DRAM page size differs from DMMU page size we need to allocate
2238 	 * DRAM memory in units of dram_page size and mapping this memory in
2239 	 * units of DMMU page size. we overcome this size mismatch using a
2240 	 * scrambling routine which takes a DRAM page and converts it to a DMMU
2241 	 * page.
2242 	 * We therefore:
2243 	 * 1. partition the virtual address space to DRAM-page (whole) pages.
2244 	 *    (suppose we get n such pages)
2245 	 * 2. limit the amount of virtual address space we got from 1 above to
2246 	 *    a multiple of 64M as we don't want the scrambled address to cross
2247 	 *    the DRAM virtual address space.
2248 	 *    ( m = (n * DRAM_page_size) / DMMU_page_size).
2249 	 * 3. determine the and address accordingly
2250 	 *    end_addr = start_addr + m * 48M
2251 	 *
2252 	 *    the DRAM address MSBs (63:48) are not part of the roundup calculation
2253 	 */
2254 	prop->dmmu.start_addr = prop->dram_base_address +
2255 			(prop->dram_page_size *
2256 				DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
2257 
2258 	prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
2259 			div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
2260 
2261 	return 0;
2262 }
2263 
2264 static int gaudi2_set_fixed_properties(struct hl_device *hdev)
2265 {
2266 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2267 	struct hw_queue_properties *q_props;
2268 	u32 num_sync_stream_queues = 0;
2269 	int i;
2270 
2271 	prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
2272 	prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
2273 					GFP_KERNEL);
2274 
2275 	if (!prop->hw_queues_props)
2276 		return -ENOMEM;
2277 
2278 	q_props = prop->hw_queues_props;
2279 
2280 	for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
2281 		q_props[i].type = QUEUE_TYPE_HW;
2282 		q_props[i].driver_only = 0;
2283 
2284 		if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) {
2285 			q_props[i].supports_sync_stream = 0;
2286 		} else {
2287 			q_props[i].supports_sync_stream = 1;
2288 			num_sync_stream_queues++;
2289 		}
2290 
2291 		q_props[i].cb_alloc_flags = CB_ALLOC_USER;
2292 	}
2293 
2294 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
2295 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1;
2296 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL;
2297 
2298 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
2299 	prop->cfg_base_address = CFG_BASE;
2300 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0;
2301 	prop->host_base_address = HOST_PHYS_BASE_0;
2302 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0;
2303 	prop->max_pending_cs = GAUDI2_MAX_PENDING_CS;
2304 	prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER;
2305 	prop->user_dec_intr_count = NUMBER_OF_DEC;
2306 	prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
2307 	prop->completion_mode = HL_COMPLETION_MODE_CS;
2308 	prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
2309 	prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
2310 
2311 	prop->sram_base_address = SRAM_BASE_ADDR;
2312 	prop->sram_size = SRAM_SIZE;
2313 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
2314 	prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET;
2315 
2316 	prop->hints_range_reservation = true;
2317 
2318 	prop->rotator_enabled_mask = BIT(NUM_OF_ROT) - 1;
2319 
2320 	if (hdev->pldm)
2321 		prop->mmu_pgt_size = 0x800000; /* 8MB */
2322 	else
2323 		prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE;
2324 
2325 	prop->mmu_pte_size = HL_PTE_SIZE;
2326 	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
2327 	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
2328 
2329 	prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
2330 	prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
2331 	prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
2332 	prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
2333 	prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT;
2334 	prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
2335 	prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
2336 	prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
2337 	prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
2338 	prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK;
2339 	prop->dmmu.page_size = PAGE_SIZE_1GB;
2340 	prop->dmmu.num_hops = MMU_ARCH_6_HOPS;
2341 	prop->dmmu.last_mask = LAST_MASK;
2342 	prop->dmmu.host_resident = 1;
2343 	prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
2344 	prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
2345 
2346 	/*
2347 	 * this is done in order to be able to validate FW descriptor (i.e. validating that
2348 	 * the addresses and allocated space for FW image does not cross memory bounds).
2349 	 * for this reason we set the DRAM size to the minimum possible and later it will
2350 	 * be modified according to what reported in the cpucp info packet
2351 	 */
2352 	prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G;
2353 
2354 	hdev->pmmu_huge_range = true;
2355 	prop->pmmu.host_resident = 1;
2356 	prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
2357 	prop->pmmu.last_mask = LAST_MASK;
2358 	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
2359 	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
2360 
2361 	prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
2362 	prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
2363 	prop->hints_host_hpage_reserved_va_range.start_addr =
2364 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
2365 	prop->hints_host_hpage_reserved_va_range.end_addr =
2366 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END;
2367 
2368 	if (PAGE_SIZE == SZ_64K) {
2369 		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K;
2370 		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K;
2371 		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K;
2372 		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K;
2373 		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K;
2374 		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K;
2375 		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K;
2376 		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K;
2377 		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K;
2378 		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K;
2379 		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K;
2380 		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K;
2381 		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2382 		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2383 		prop->pmmu.page_size = PAGE_SIZE_64KB;
2384 
2385 		/* shifts and masks are the same in PMMU and HPMMU */
2386 		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2387 		prop->pmmu_huge.page_size = PAGE_SIZE_16MB;
2388 		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2389 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2390 	} else {
2391 		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K;
2392 		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K;
2393 		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K;
2394 		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K;
2395 		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K;
2396 		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K;
2397 		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K;
2398 		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K;
2399 		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K;
2400 		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K;
2401 		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K;
2402 		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K;
2403 		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2404 		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2405 		prop->pmmu.page_size = PAGE_SIZE_4KB;
2406 
2407 		/* shifts and masks are the same in PMMU and HPMMU */
2408 		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2409 		prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
2410 		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2411 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2412 	}
2413 
2414 	prop->max_num_of_engines = GAUDI2_ENGINE_ID_SIZE;
2415 	prop->num_engine_cores = CPU_ID_MAX;
2416 	prop->cfg_size = CFG_SIZE;
2417 	prop->max_asid = MAX_ASID;
2418 	prop->num_of_events = GAUDI2_EVENT_SIZE;
2419 
2420 	prop->supports_engine_modes = true;
2421 
2422 	prop->dc_power_default = DC_POWER_DEFAULT;
2423 
2424 	prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
2425 	prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE;
2426 	prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
2427 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
2428 
2429 	strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2430 
2431 	prop->mme_master_slave_mode = 1;
2432 
2433 	prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
2434 					(num_sync_stream_queues * HL_RSVD_SOBS);
2435 
2436 	prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
2437 					(num_sync_stream_queues * HL_RSVD_MONS);
2438 
2439 	prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
2440 	prop->tpc_interrupt_id = GAUDI2_IRQ_NUM_TPC_ASSERT;
2441 	prop->unexpected_user_error_interrupt_id = GAUDI2_IRQ_NUM_UNEXPECTED_ERROR;
2442 
2443 	prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
2444 
2445 	prop->fw_cpu_boot_dev_sts0_valid = false;
2446 	prop->fw_cpu_boot_dev_sts1_valid = false;
2447 	prop->hard_reset_done_by_fw = false;
2448 	prop->gic_interrupts_enable = true;
2449 
2450 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
2451 
2452 	prop->max_dec = NUMBER_OF_DEC;
2453 
2454 	prop->clk_pll_index = HL_GAUDI2_MME_PLL;
2455 
2456 	prop->dma_mask = 64;
2457 
2458 	prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0;
2459 
2460 	return 0;
2461 }
2462 
2463 static int gaudi2_pci_bars_map(struct hl_device *hdev)
2464 {
2465 	static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"};
2466 	bool is_wc[3] = {false, false, true};
2467 	int rc;
2468 
2469 	rc = hl_pci_bars_map(hdev, name, is_wc);
2470 	if (rc)
2471 		return rc;
2472 
2473 	hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR);
2474 
2475 	return 0;
2476 }
2477 
2478 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
2479 {
2480 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2481 	struct hl_inbound_pci_region pci_region;
2482 	u64 old_addr = addr;
2483 	int rc;
2484 
2485 	if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr))
2486 		return old_addr;
2487 
2488 	if (hdev->asic_prop.iatu_done_by_fw)
2489 		return U64_MAX;
2490 
2491 	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2492 	pci_region.mode = PCI_BAR_MATCH_MODE;
2493 	pci_region.bar = DRAM_BAR_ID;
2494 	pci_region.addr = addr;
2495 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
2496 	if (rc)
2497 		return U64_MAX;
2498 
2499 	if (gaudi2) {
2500 		old_addr = gaudi2->dram_bar_cur_addr;
2501 		gaudi2->dram_bar_cur_addr = addr;
2502 	}
2503 
2504 	return old_addr;
2505 }
2506 
2507 static int gaudi2_init_iatu(struct hl_device *hdev)
2508 {
2509 	struct hl_inbound_pci_region inbound_region;
2510 	struct hl_outbound_pci_region outbound_region;
2511 	u32 bar_addr_low, bar_addr_high;
2512 	int rc;
2513 
2514 	if (hdev->asic_prop.iatu_done_by_fw)
2515 		return 0;
2516 
2517 	/* Temporary inbound Region 0 - Bar 0 - Point to CFG
2518 	 * We must map this region in BAR match mode in order to
2519 	 * fetch BAR physical base address
2520 	 */
2521 	inbound_region.mode = PCI_BAR_MATCH_MODE;
2522 	inbound_region.bar = SRAM_CFG_BAR_ID;
2523 	/* Base address must be aligned to Bar size which is 256 MB */
2524 	inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF;
2525 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2526 	if (rc)
2527 		return rc;
2528 
2529 	/* Fetch physical BAR address */
2530 	bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF);
2531 	bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF;
2532 
2533 	hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low;
2534 
2535 	/* Inbound Region 0 - Bar 0 - Point to CFG */
2536 	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2537 	inbound_region.bar = SRAM_CFG_BAR_ID;
2538 	inbound_region.offset_in_bar = 0;
2539 	inbound_region.addr = STM_FLASH_BASE_ADDR;
2540 	inbound_region.size = CFG_REGION_SIZE;
2541 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2542 	if (rc)
2543 		return rc;
2544 
2545 	/* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */
2546 	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2547 	inbound_region.bar = SRAM_CFG_BAR_ID;
2548 	inbound_region.offset_in_bar = CFG_REGION_SIZE;
2549 	inbound_region.addr = BAR0_RSRVD_BASE_ADDR;
2550 	inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE;
2551 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
2552 	if (rc)
2553 		return rc;
2554 
2555 	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2556 	inbound_region.mode = PCI_BAR_MATCH_MODE;
2557 	inbound_region.bar = DRAM_BAR_ID;
2558 	inbound_region.addr = DRAM_PHYS_BASE;
2559 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
2560 	if (rc)
2561 		return rc;
2562 
2563 	/* Outbound Region 0 - Point to Host */
2564 	outbound_region.addr = HOST_PHYS_BASE_0;
2565 	outbound_region.size = HOST_PHYS_SIZE_0;
2566 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
2567 
2568 	return rc;
2569 }
2570 
2571 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev)
2572 {
2573 	return RREG32(mmHW_STATE);
2574 }
2575 
2576 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev)
2577 {
2578 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2579 
2580 	/*
2581 	 * check for error condition in which number of binning candidates
2582 	 * is higher than the maximum supported by the driver
2583 	 */
2584 	if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) {
2585 		dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n",
2586 					MAX_CLUSTER_BINNING_FAULTY_TPCS,
2587 					hdev->tpc_binning);
2588 		return -EINVAL;
2589 	}
2590 
2591 	prop->tpc_binning_mask = hdev->tpc_binning;
2592 	prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK;
2593 
2594 	return 0;
2595 }
2596 
2597 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev)
2598 {
2599 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2600 	struct hw_queue_properties *q_props = prop->hw_queues_props;
2601 	u64 tpc_binning_mask;
2602 	u8 subst_idx = 0;
2603 	int i, rc;
2604 
2605 	rc = gaudi2_tpc_binning_init_prop(hdev);
2606 	if (rc)
2607 		return rc;
2608 
2609 	tpc_binning_mask = prop->tpc_binning_mask;
2610 
2611 	for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) {
2612 		u8 subst_seq, binned, qid_base;
2613 
2614 		if (tpc_binning_mask == 0)
2615 			break;
2616 
2617 		if (subst_idx == 0) {
2618 			subst_seq = TPC_ID_DCORE0_TPC6;
2619 			qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
2620 		} else {
2621 			subst_seq = TPC_ID_DCORE3_TPC5;
2622 			qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0;
2623 		}
2624 
2625 
2626 		/* clear bit from mask */
2627 		binned = __ffs(tpc_binning_mask);
2628 		/*
2629 		 * Coverity complains about possible out-of-bound access in
2630 		 * clear_bit
2631 		 */
2632 		if (binned >= TPC_ID_SIZE) {
2633 			dev_err(hdev->dev,
2634 				"Invalid binned TPC (binning mask: %llx)\n",
2635 				tpc_binning_mask);
2636 			return -EINVAL;
2637 		}
2638 		clear_bit(binned, (unsigned long *)&tpc_binning_mask);
2639 
2640 		/* also clear replacing TPC bit from enabled mask */
2641 		clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask);
2642 
2643 		/* bin substite TPC's Qs */
2644 		q_props[qid_base].binned = 1;
2645 		q_props[qid_base + 1].binned = 1;
2646 		q_props[qid_base + 2].binned = 1;
2647 		q_props[qid_base + 3].binned = 1;
2648 
2649 		subst_idx++;
2650 	}
2651 
2652 	return 0;
2653 }
2654 
2655 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev)
2656 {
2657 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2658 	u8 num_faulty;
2659 
2660 	num_faulty = hweight32(hdev->decoder_binning);
2661 
2662 	/*
2663 	 * check for error condition in which number of binning candidates
2664 	 * is higher than the maximum supported by the driver
2665 	 */
2666 	if (num_faulty > MAX_FAULTY_DECODERS) {
2667 		dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n",
2668 						hdev->decoder_binning);
2669 		return -EINVAL;
2670 	}
2671 
2672 	prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK);
2673 
2674 	if (prop->decoder_binning_mask)
2675 		prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1));
2676 	else
2677 		prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK;
2678 
2679 	return 0;
2680 }
2681 
2682 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev)
2683 {
2684 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2685 
2686 	/* check if we should override default binning */
2687 	if (!hdev->dram_binning) {
2688 		prop->dram_binning_mask = 0;
2689 		prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK;
2690 		return;
2691 	}
2692 
2693 	/* set DRAM binning constraints */
2694 	prop->faulty_dram_cluster_map |= hdev->dram_binning;
2695 	prop->dram_binning_mask = hdev->dram_binning;
2696 	prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5);
2697 }
2698 
2699 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev)
2700 {
2701 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2702 	struct hw_queue_properties *q_props;
2703 	u8 seq, num_faulty;
2704 
2705 	num_faulty = hweight32(hdev->edma_binning);
2706 
2707 	/*
2708 	 * check for error condition in which number of binning candidates
2709 	 * is higher than the maximum supported by the driver
2710 	 */
2711 	if (num_faulty > MAX_FAULTY_EDMAS) {
2712 		dev_err(hdev->dev,
2713 			"EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n",
2714 			hdev->edma_binning);
2715 		return -EINVAL;
2716 	}
2717 
2718 	if (!hdev->edma_binning) {
2719 		prop->edma_binning_mask = 0;
2720 		prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK;
2721 		return 0;
2722 	}
2723 
2724 	seq = __ffs((unsigned long)hdev->edma_binning);
2725 
2726 	/* set binning constraints */
2727 	prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]);
2728 	prop->edma_binning_mask = hdev->edma_binning;
2729 	prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1);
2730 
2731 	/* bin substitute EDMA's queue */
2732 	q_props = prop->hw_queues_props;
2733 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1;
2734 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1;
2735 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1;
2736 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1;
2737 
2738 	return 0;
2739 }
2740 
2741 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask)
2742 {
2743 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2744 	u8 num_faulty, seq;
2745 
2746 	/* check if we should override default binning */
2747 	if (!xbar_edge_iso_mask) {
2748 		prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK;
2749 		return 0;
2750 	}
2751 
2752 	/*
2753 	 * note that it can be set to value other than 0 only after cpucp packet (i.e.
2754 	 * only the FW can set a redundancy value). for user it'll always be 0.
2755 	 */
2756 	num_faulty = hweight32(xbar_edge_iso_mask);
2757 
2758 	/*
2759 	 * check for error condition in which number of binning candidates
2760 	 * is higher than the maximum supported by the driver
2761 	 */
2762 	if (num_faulty > MAX_FAULTY_XBARS) {
2763 		dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n",
2764 									MAX_FAULTY_XBARS);
2765 		return -EINVAL;
2766 	}
2767 
2768 	seq = __ffs((unsigned long)xbar_edge_iso_mask);
2769 
2770 	/* set binning constraints */
2771 	prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]);
2772 	prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK;
2773 
2774 	return 0;
2775 }
2776 
2777 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask)
2778 {
2779 	int rc;
2780 
2781 	/*
2782 	 * mark all clusters as good, each component will "fail" cluster
2783 	 * based on eFuse/user values.
2784 	 * If more than single cluster is faulty- the chip is unusable
2785 	 */
2786 	hdev->asic_prop.faulty_dram_cluster_map = 0;
2787 
2788 	gaudi2_set_dram_binning_masks(hdev);
2789 
2790 	rc = gaudi2_set_edma_binning_masks(hdev);
2791 	if (rc)
2792 		return rc;
2793 
2794 	rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask);
2795 	if (rc)
2796 		return rc;
2797 
2798 
2799 	/* always initially set to full mask */
2800 	hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK;
2801 
2802 	return 0;
2803 }
2804 
2805 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev)
2806 {
2807 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2808 	int rc;
2809 
2810 	rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask);
2811 	if (rc)
2812 		return rc;
2813 
2814 	/* if we have DRAM binning reported by FW we should perform cluster config  */
2815 	if (prop->faulty_dram_cluster_map) {
2816 		u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map);
2817 
2818 		prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq];
2819 	}
2820 
2821 	return 0;
2822 }
2823 
2824 static int gaudi2_set_binning_masks(struct hl_device *hdev)
2825 {
2826 	int rc;
2827 
2828 	rc = gaudi2_set_cluster_binning_masks(hdev);
2829 	if (rc)
2830 		return rc;
2831 
2832 	rc = gaudi2_set_tpc_binning_masks(hdev);
2833 	if (rc)
2834 		return rc;
2835 
2836 	rc = gaudi2_set_dec_binning_masks(hdev);
2837 	if (rc)
2838 		return rc;
2839 
2840 	return 0;
2841 }
2842 
2843 static int gaudi2_cpucp_info_get(struct hl_device *hdev)
2844 {
2845 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2846 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2847 	long max_power;
2848 	u64 dram_size;
2849 	int rc;
2850 
2851 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2852 		return 0;
2853 
2854 	/* No point of asking this information again when not doing hard reset, as the device
2855 	 * CPU hasn't been reset
2856 	 */
2857 	if (hdev->reset_info.in_compute_reset)
2858 		return 0;
2859 
2860 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
2861 										mmCPU_BOOT_ERR1);
2862 	if (rc)
2863 		return rc;
2864 
2865 	dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
2866 	if (dram_size) {
2867 		/* we can have wither 5 or 6 HBMs. other values are invalid */
2868 
2869 		if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) &&
2870 					(dram_size != (GAUDI2_HBM_NUM * SZ_16G))) {
2871 			dev_err(hdev->dev,
2872 				"F/W reported invalid DRAM size %llu. Trying to use default size %llu\n",
2873 				dram_size, prop->dram_size);
2874 			dram_size = prop->dram_size;
2875 		}
2876 
2877 		prop->dram_size = dram_size;
2878 		prop->dram_end_address = prop->dram_base_address + dram_size;
2879 	}
2880 
2881 	if (!strlen(prop->cpucp_info.card_name))
2882 		strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2883 
2884 	/* Overwrite binning masks with the actual binning values from F/W */
2885 	hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
2886 	hdev->edma_binning = prop->cpucp_info.edma_binning_mask;
2887 	hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
2888 	hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
2889 
2890 	/*
2891 	 * at this point the DRAM parameters need to be updated according to data obtained
2892 	 * from the FW
2893 	 */
2894 	rc = hdev->asic_funcs->set_dram_properties(hdev);
2895 	if (rc)
2896 		return rc;
2897 
2898 	rc = hdev->asic_funcs->set_binning_masks(hdev);
2899 	if (rc)
2900 		return rc;
2901 
2902 	max_power = hl_fw_get_max_power(hdev);
2903 	if (max_power < 0)
2904 		return max_power;
2905 
2906 	prop->max_power_default = (u64) max_power;
2907 
2908 	return 0;
2909 }
2910 
2911 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
2912 {
2913 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2914 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS];
2915 	int rc;
2916 
2917 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2918 		return 0;
2919 
2920 	rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr);
2921 	if (rc)
2922 		return rc;
2923 
2924 	hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3];
2925 
2926 	return 0;
2927 }
2928 
2929 static int gaudi2_early_init(struct hl_device *hdev)
2930 {
2931 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2932 	struct pci_dev *pdev = hdev->pdev;
2933 	resource_size_t pci_bar_size;
2934 	int rc;
2935 
2936 	rc = gaudi2_set_fixed_properties(hdev);
2937 	if (rc)
2938 		return rc;
2939 
2940 	/* Check BAR sizes */
2941 	pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
2942 
2943 	if (pci_bar_size != CFG_BAR_SIZE) {
2944 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2945 			SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
2946 		rc = -ENODEV;
2947 		goto free_queue_props;
2948 	}
2949 
2950 	pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
2951 	if (pci_bar_size != MSIX_BAR_SIZE) {
2952 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2953 			MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
2954 		rc = -ENODEV;
2955 		goto free_queue_props;
2956 	}
2957 
2958 	prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
2959 	hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
2960 
2961 	/*
2962 	 * Only in pldm driver config iATU
2963 	 */
2964 	if (hdev->pldm)
2965 		hdev->asic_prop.iatu_done_by_fw = false;
2966 	else
2967 		hdev->asic_prop.iatu_done_by_fw = true;
2968 
2969 	rc = hl_pci_init(hdev);
2970 	if (rc)
2971 		goto free_queue_props;
2972 
2973 	/* Before continuing in the initialization, we need to read the preboot
2974 	 * version to determine whether we run with a security-enabled firmware
2975 	 */
2976 	rc = hl_fw_read_preboot_status(hdev);
2977 	if (rc) {
2978 		if (hdev->reset_on_preboot_fail)
2979 			/* we are already on failure flow, so don't check if hw_fini fails. */
2980 			hdev->asic_funcs->hw_fini(hdev, true, false);
2981 		goto pci_fini;
2982 	}
2983 
2984 	if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
2985 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
2986 		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
2987 		if (rc) {
2988 			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
2989 			goto pci_fini;
2990 		}
2991 	}
2992 
2993 	return 0;
2994 
2995 pci_fini:
2996 	hl_pci_fini(hdev);
2997 free_queue_props:
2998 	kfree(hdev->asic_prop.hw_queues_props);
2999 	return rc;
3000 }
3001 
3002 static int gaudi2_early_fini(struct hl_device *hdev)
3003 {
3004 	kfree(hdev->asic_prop.hw_queues_props);
3005 	hl_pci_fini(hdev);
3006 
3007 	return 0;
3008 }
3009 
3010 static bool gaudi2_is_arc_nic_owned(u64 arc_id)
3011 {
3012 	switch (arc_id) {
3013 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
3014 		return true;
3015 	default:
3016 		return false;
3017 	}
3018 }
3019 
3020 static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
3021 {
3022 	switch (arc_id) {
3023 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
3024 		return true;
3025 	default:
3026 		return false;
3027 	}
3028 }
3029 
3030 static void gaudi2_init_arcs(struct hl_device *hdev)
3031 {
3032 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3033 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3034 	u64 arc_id;
3035 	u32 i;
3036 
3037 	for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) {
3038 		if (gaudi2_is_arc_enabled(hdev, i))
3039 			continue;
3040 
3041 		gaudi2_set_arc_id_cap(hdev, i);
3042 	}
3043 
3044 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
3045 		if (!gaudi2_is_queue_enabled(hdev, i))
3046 			continue;
3047 
3048 		arc_id = gaudi2_queue_id_to_arc_id[i];
3049 		if (gaudi2_is_arc_enabled(hdev, arc_id))
3050 			continue;
3051 
3052 		if (gaudi2_is_arc_nic_owned(arc_id) &&
3053 				!(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)))
3054 			continue;
3055 
3056 		if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized &
3057 							BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)))
3058 			continue;
3059 
3060 		gaudi2_set_arc_id_cap(hdev, arc_id);
3061 	}
3062 
3063 	/* Fetch ARC scratchpad address */
3064 	hdev->asic_prop.engine_core_interrupt_reg_addr =
3065 		CFG_BASE + le32_to_cpu(dyn_regs->eng_arc_irq_ctrl);
3066 }
3067 
3068 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
3069 {
3070 	u32 reg_base, reg_val;
3071 	int rc;
3072 
3073 	switch (cpu_id) {
3074 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3:
3075 		/* Each ARC scheduler has 2 consecutive DCCM blocks */
3076 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3077 						ARC_DCCM_BLOCK_SIZE * 2, true);
3078 		if (rc)
3079 			return rc;
3080 		break;
3081 	case CPU_ID_SCHED_ARC4:
3082 	case CPU_ID_SCHED_ARC5:
3083 	case CPU_ID_MME_QMAN_ARC0:
3084 	case CPU_ID_MME_QMAN_ARC1:
3085 		reg_base = gaudi2_arc_blocks_bases[cpu_id];
3086 
3087 		/* Scrub lower DCCM block */
3088 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3089 						ARC_DCCM_BLOCK_SIZE, true);
3090 		if (rc)
3091 			return rc;
3092 
3093 		/* Switch to upper DCCM block */
3094 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1);
3095 		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3096 
3097 		/* Scrub upper DCCM block */
3098 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3099 						ARC_DCCM_BLOCK_SIZE, true);
3100 		if (rc)
3101 			return rc;
3102 
3103 		/* Switch to lower DCCM block */
3104 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0);
3105 		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3106 		break;
3107 	default:
3108 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3109 						ARC_DCCM_BLOCK_SIZE, true);
3110 		if (rc)
3111 			return rc;
3112 	}
3113 
3114 	return 0;
3115 }
3116 
3117 static int gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
3118 {
3119 	u16 arc_id;
3120 	int rc;
3121 
3122 	for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
3123 		if (!gaudi2_is_arc_enabled(hdev, arc_id))
3124 			continue;
3125 
3126 		rc = gaudi2_scrub_arc_dccm(hdev, arc_id);
3127 		if (rc)
3128 			return rc;
3129 	}
3130 
3131 	return 0;
3132 }
3133 
3134 static int gaudi2_late_init(struct hl_device *hdev)
3135 {
3136 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3137 	int rc;
3138 
3139 	hdev->asic_prop.supports_advanced_cpucp_rc = true;
3140 
3141 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
3142 					gaudi2->virt_msix_db_dma_addr);
3143 	if (rc) {
3144 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
3145 		return rc;
3146 	}
3147 
3148 	rc = gaudi2_fetch_psoc_frequency(hdev);
3149 	if (rc) {
3150 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
3151 		goto disable_pci_access;
3152 	}
3153 
3154 	gaudi2_init_arcs(hdev);
3155 
3156 	rc = gaudi2_scrub_arcs_dccm(hdev);
3157 	if (rc) {
3158 		dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
3159 		goto disable_pci_access;
3160 	}
3161 
3162 	gaudi2_init_security(hdev);
3163 
3164 	return 0;
3165 
3166 disable_pci_access:
3167 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
3168 
3169 	return rc;
3170 }
3171 
3172 static void gaudi2_late_fini(struct hl_device *hdev)
3173 {
3174 	hl_hwmon_release_resources(hdev);
3175 }
3176 
3177 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
3178 {
3179 	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3180 
3181 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3182 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3183 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3184 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3185 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3186 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3187 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3188 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3189 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3190 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3191 }
3192 
3193 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev)
3194 {
3195 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3196 	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3197 	u32 block_size, umr_start_idx, num_umr_blocks;
3198 	int i;
3199 
3200 	for (i = 0 ; i < NUM_ARC_CPUS ; i++) {
3201 		if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3)
3202 			block_size = ARC_DCCM_BLOCK_SIZE * 2;
3203 		else
3204 			block_size = ARC_DCCM_BLOCK_SIZE;
3205 
3206 		blocks[i].address = gaudi2_arc_dccm_bases[i];
3207 		blocks[i].size = block_size;
3208 	}
3209 
3210 	blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE;
3211 	blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE;
3212 
3213 	blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE;
3214 	blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE;
3215 
3216 	blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE;
3217 	blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE;
3218 
3219 	blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE;
3220 	blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE;
3221 
3222 	blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE;
3223 	blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE;
3224 
3225 	blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE;
3226 	blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE;
3227 
3228 	blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE;
3229 	blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE;
3230 
3231 	blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE;
3232 	blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE;
3233 
3234 	umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS;
3235 	num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS;
3236 	for (i = 0 ; i < num_umr_blocks ; i++) {
3237 		u8 nic_id, umr_block_id;
3238 
3239 		nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS;
3240 		umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS;
3241 
3242 		blocks[umr_start_idx + i].address =
3243 			mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE +
3244 			(nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET +
3245 			(nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET +
3246 			umr_block_id * NIC_UMR_OFFSET;
3247 		blocks[umr_start_idx + i].size = HL_BLOCK_SIZE;
3248 	}
3249 
3250 	/* Expose decoder HW configuration block to user */
3251 	gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX);
3252 
3253 	for (i = 1; i < NUM_OF_DCORES; ++i) {
3254 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE;
3255 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE;
3256 
3257 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address =
3258 						mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET;
3259 
3260 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address =
3261 						mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET;
3262 	}
3263 }
3264 
3265 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
3266 {
3267 	dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
3268 	void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {};
3269 	int i, j, rc = 0;
3270 
3271 	/* The device ARC works with 32-bits addresses, and because there is a single HW register
3272 	 * that holds the extension bits (49..28), these bits must be identical in all the allocated
3273 	 * range.
3274 	 */
3275 
3276 	for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
3277 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
3278 							&dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO);
3279 		if (!virt_addr_arr[i]) {
3280 			rc = -ENOMEM;
3281 			goto free_dma_mem_arr;
3282 		}
3283 
3284 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
3285 		if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr))
3286 			break;
3287 	}
3288 
3289 	if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) {
3290 		dev_err(hdev->dev,
3291 			"MSB of ARC accessible DMA memory are not identical in all range\n");
3292 		rc = -EFAULT;
3293 		goto free_dma_mem_arr;
3294 	}
3295 
3296 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
3297 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
3298 
3299 free_dma_mem_arr:
3300 	for (j = 0 ; j < i ; j++)
3301 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
3302 						dma_addr_arr[j]);
3303 
3304 	return rc;
3305 }
3306 
3307 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev)
3308 {
3309 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3310 	struct pci_mem_region *region;
3311 
3312 	/* CFG */
3313 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
3314 	region->region_base = CFG_BASE;
3315 	region->region_size = CFG_SIZE;
3316 	region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR;
3317 	region->bar_size = CFG_BAR_SIZE;
3318 	region->bar_id = SRAM_CFG_BAR_ID;
3319 	region->used = 1;
3320 
3321 	/* SRAM */
3322 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
3323 	region->region_base = SRAM_BASE_ADDR;
3324 	region->region_size = SRAM_SIZE;
3325 	region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE;
3326 	region->bar_size = CFG_BAR_SIZE;
3327 	region->bar_id = SRAM_CFG_BAR_ID;
3328 	region->used = 1;
3329 
3330 	/* DRAM */
3331 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
3332 	region->region_base = DRAM_PHYS_BASE;
3333 	region->region_size = hdev->asic_prop.dram_size;
3334 	region->offset_in_bar = 0;
3335 	region->bar_size = prop->dram_pci_bar_size;
3336 	region->bar_id = DRAM_BAR_ID;
3337 	region->used = 1;
3338 }
3339 
3340 static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
3341 {
3342 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3343 	int i, j, k;
3344 
3345 	/* Initialize TPC interrupt */
3346 	HL_USR_INTR_STRUCT_INIT(hdev->tpc_interrupt, hdev, 0, HL_USR_INTERRUPT_TPC);
3347 
3348 	/* Initialize general purpose interrupt */
3349 	HL_USR_INTR_STRUCT_INIT(hdev->unexpected_error_interrupt, hdev, 0,
3350 						HL_USR_INTERRUPT_UNEXPECTED);
3351 
3352 	/* Initialize common user CQ interrupt */
3353 	HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
3354 				HL_COMMON_USER_CQ_INTERRUPT_ID, HL_USR_INTERRUPT_CQ);
3355 
3356 	/* Initialize common decoder interrupt */
3357 	HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev,
3358 				HL_COMMON_DEC_INTERRUPT_ID, HL_USR_INTERRUPT_DECODER);
3359 
3360 	/* User interrupts structure holds both decoder and user interrupts from various engines.
3361 	 * We first initialize the decoder interrupts and then we add the user interrupts.
3362 	 * The only limitation is that the last decoder interrupt id must be smaller
3363 	 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time.
3364 	 */
3365 
3366 	/* Initialize decoder interrupts, expose only normal interrupts,
3367 	 * error interrupts to be handled by driver
3368 	 */
3369 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM;
3370 										i += 2, j++)
3371 		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i,
3372 						HL_USR_INTERRUPT_DECODER);
3373 
3374 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++)
3375 		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, HL_USR_INTERRUPT_CQ);
3376 }
3377 
3378 static inline int gaudi2_get_non_zero_random_int(void)
3379 {
3380 	int rand = get_random_u32();
3381 
3382 	return rand ? rand : 1;
3383 }
3384 
3385 static void gaudi2_special_blocks_free(struct hl_device *hdev)
3386 {
3387 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3388 	struct hl_skip_blocks_cfg *skip_special_blocks_cfg =
3389 			&prop->skip_special_blocks_cfg;
3390 
3391 	kfree(prop->special_blocks);
3392 	kfree(skip_special_blocks_cfg->block_types);
3393 	kfree(skip_special_blocks_cfg->block_ranges);
3394 }
3395 
3396 static void gaudi2_special_blocks_iterator_free(struct hl_device *hdev)
3397 {
3398 	gaudi2_special_blocks_free(hdev);
3399 }
3400 
3401 static bool gaudi2_special_block_skip(struct hl_device *hdev,
3402 		struct hl_special_blocks_cfg *special_blocks_cfg,
3403 		u32 blk_idx, u32 major, u32 minor, u32 sub_minor)
3404 {
3405 	return false;
3406 }
3407 
3408 static int gaudi2_special_blocks_config(struct hl_device *hdev)
3409 {
3410 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3411 	int i, rc;
3412 
3413 	/* Configure Special blocks */
3414 	prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE;
3415 	prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks);
3416 	prop->special_blocks = kmalloc_array(prop->num_of_special_blocks,
3417 			sizeof(*prop->special_blocks), GFP_KERNEL);
3418 	if (!prop->special_blocks)
3419 		return -ENOMEM;
3420 
3421 	for (i = 0 ; i < prop->num_of_special_blocks ; i++)
3422 		memcpy(&prop->special_blocks[i], &gaudi2_special_blocks[i],
3423 				sizeof(*prop->special_blocks));
3424 
3425 	/* Configure when to skip Special blocks */
3426 	memset(&prop->skip_special_blocks_cfg, 0, sizeof(prop->skip_special_blocks_cfg));
3427 	prop->skip_special_blocks_cfg.skip_block_hook = gaudi2_special_block_skip;
3428 
3429 	if (ARRAY_SIZE(gaudi2_iterator_skip_block_types)) {
3430 		prop->skip_special_blocks_cfg.block_types =
3431 				kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_types),
3432 					sizeof(gaudi2_iterator_skip_block_types[0]), GFP_KERNEL);
3433 		if (!prop->skip_special_blocks_cfg.block_types) {
3434 			rc = -ENOMEM;
3435 			goto free_special_blocks;
3436 		}
3437 
3438 		memcpy(prop->skip_special_blocks_cfg.block_types, gaudi2_iterator_skip_block_types,
3439 				sizeof(gaudi2_iterator_skip_block_types));
3440 
3441 		prop->skip_special_blocks_cfg.block_types_len =
3442 					ARRAY_SIZE(gaudi2_iterator_skip_block_types);
3443 	}
3444 
3445 	if (ARRAY_SIZE(gaudi2_iterator_skip_block_ranges)) {
3446 		prop->skip_special_blocks_cfg.block_ranges =
3447 				kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_ranges),
3448 					sizeof(gaudi2_iterator_skip_block_ranges[0]), GFP_KERNEL);
3449 		if (!prop->skip_special_blocks_cfg.block_ranges) {
3450 			rc = -ENOMEM;
3451 			goto free_skip_special_blocks_types;
3452 		}
3453 
3454 		for (i = 0 ; i < ARRAY_SIZE(gaudi2_iterator_skip_block_ranges) ; i++)
3455 			memcpy(&prop->skip_special_blocks_cfg.block_ranges[i],
3456 					&gaudi2_iterator_skip_block_ranges[i],
3457 					sizeof(struct range));
3458 
3459 		prop->skip_special_blocks_cfg.block_ranges_len =
3460 					ARRAY_SIZE(gaudi2_iterator_skip_block_ranges);
3461 	}
3462 
3463 	return 0;
3464 
3465 free_skip_special_blocks_types:
3466 	kfree(prop->skip_special_blocks_cfg.block_types);
3467 free_special_blocks:
3468 	kfree(prop->special_blocks);
3469 
3470 	return rc;
3471 }
3472 
3473 static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev)
3474 {
3475 	return gaudi2_special_blocks_config(hdev);
3476 }
3477 
3478 static int gaudi2_sw_init(struct hl_device *hdev)
3479 {
3480 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3481 	struct gaudi2_device *gaudi2;
3482 	int i, rc;
3483 
3484 	/* Allocate device structure */
3485 	gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL);
3486 	if (!gaudi2)
3487 		return -ENOMEM;
3488 
3489 	for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) {
3490 		if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid)
3491 			continue;
3492 
3493 		if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) {
3494 			dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n",
3495 				GAUDI2_EVENT_SIZE);
3496 			rc = -EINVAL;
3497 			goto free_gaudi2_device;
3498 		}
3499 
3500 		gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id;
3501 	}
3502 
3503 	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++)
3504 		gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int();
3505 
3506 	gaudi2->cpucp_info_get = gaudi2_cpucp_info_get;
3507 
3508 	hdev->asic_specific = gaudi2;
3509 
3510 	/* Create DMA pool for small allocations.
3511 	 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped
3512 	 * PI/CI registers allocated from this pool have this restriction
3513 	 */
3514 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev,
3515 					GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0);
3516 	if (!hdev->dma_pool) {
3517 		dev_err(hdev->dev, "failed to create DMA pool\n");
3518 		rc = -ENOMEM;
3519 		goto free_gaudi2_device;
3520 	}
3521 
3522 	rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev);
3523 	if (rc)
3524 		goto free_dma_pool;
3525 
3526 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
3527 	if (!hdev->cpu_accessible_dma_pool) {
3528 		dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n");
3529 		rc = -ENOMEM;
3530 		goto free_cpu_dma_mem;
3531 	}
3532 
3533 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem,
3534 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
3535 	if (rc) {
3536 		dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n");
3537 		rc = -EFAULT;
3538 		goto free_cpu_accessible_dma_pool;
3539 	}
3540 
3541 	gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
3542 								&gaudi2->virt_msix_db_dma_addr);
3543 	if (!gaudi2->virt_msix_db_cpu_addr) {
3544 		dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
3545 		rc = -ENOMEM;
3546 		goto free_cpu_accessible_dma_pool;
3547 	}
3548 
3549 	spin_lock_init(&gaudi2->hw_queues_lock);
3550 
3551 	gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE,
3552 							&gaudi2->scratchpad_bus_address,
3553 							GFP_KERNEL | __GFP_ZERO);
3554 	if (!gaudi2->scratchpad_kernel_address) {
3555 		rc = -ENOMEM;
3556 		goto free_virt_msix_db_mem;
3557 	}
3558 
3559 	gaudi2_user_mapped_blocks_init(hdev);
3560 
3561 	/* Initialize user interrupts */
3562 	gaudi2_user_interrupt_setup(hdev);
3563 
3564 	hdev->supports_coresight = true;
3565 	hdev->supports_sync_stream = true;
3566 	hdev->supports_cb_mapping = true;
3567 	hdev->supports_wait_for_multi_cs = false;
3568 
3569 	prop->supports_compute_reset = true;
3570 
3571 	hdev->asic_funcs->set_pci_memory_regions(hdev);
3572 
3573 	rc = gaudi2_special_blocks_iterator_config(hdev);
3574 	if (rc)
3575 		goto free_scratchpad_mem;
3576 
3577 	return 0;
3578 
3579 free_scratchpad_mem:
3580 	hl_asic_dma_pool_free(hdev, gaudi2->scratchpad_kernel_address,
3581 				gaudi2->scratchpad_bus_address);
3582 free_virt_msix_db_mem:
3583 	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3584 free_cpu_accessible_dma_pool:
3585 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3586 free_cpu_dma_mem:
3587 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3588 					hdev->cpu_accessible_dma_address);
3589 free_dma_pool:
3590 	dma_pool_destroy(hdev->dma_pool);
3591 free_gaudi2_device:
3592 	kfree(gaudi2);
3593 	return rc;
3594 }
3595 
3596 static int gaudi2_sw_fini(struct hl_device *hdev)
3597 {
3598 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3599 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3600 
3601 	gaudi2_special_blocks_iterator_free(hdev);
3602 
3603 	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3604 
3605 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3606 
3607 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3608 						hdev->cpu_accessible_dma_address);
3609 
3610 	hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3611 					gaudi2->scratchpad_bus_address);
3612 
3613 	dma_pool_destroy(hdev->dma_pool);
3614 
3615 	kfree(gaudi2);
3616 
3617 	return 0;
3618 }
3619 
3620 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base)
3621 {
3622 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP |
3623 						QM_GLBL_CFG1_CQF_STOP |
3624 						QM_GLBL_CFG1_CP_STOP);
3625 
3626 	/* stop also the ARC */
3627 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP);
3628 }
3629 
3630 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base)
3631 {
3632 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH |
3633 						QM_GLBL_CFG1_CQF_FLUSH |
3634 						QM_GLBL_CFG1_CP_FLUSH);
3635 }
3636 
3637 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base)
3638 {
3639 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH);
3640 }
3641 
3642 /**
3643  * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters
3644  *
3645  * @hdev: pointer to the habanalabs device structure
3646  * @queue_id: queue to clear fence counters to
3647  * @skip_fence: if true set maximum fence value to all fence counters to avoid
3648  *              getting stuck on any fence value. otherwise set all fence
3649  *              counters to 0 (standard clear of fence counters)
3650  */
3651 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id,
3652 						bool skip_fence)
3653 {
3654 	u32 size, reg_base;
3655 	u32 addr, val;
3656 
3657 	reg_base = gaudi2_qm_blocks_bases[queue_id];
3658 
3659 	addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET;
3660 	size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0;
3661 
3662 	/*
3663 	 * in case we want to make sure that QM that is stuck on a fence will
3664 	 * be released we should set the fence counter to a higher value that
3665 	 * the value the QM waiting for. to comply with any fence counter of
3666 	 * any value we set maximum fence value to all counters
3667 	 */
3668 	val = skip_fence ? U32_MAX : 0;
3669 	gaudi2_memset_device_lbw(hdev, addr, size, val);
3670 }
3671 
3672 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id)
3673 {
3674 	u32 reg_base = gaudi2_qm_blocks_bases[queue_id];
3675 
3676 	gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true);
3677 	gaudi2_flush_qman_common(hdev, reg_base);
3678 	gaudi2_flush_qman_arc_common(hdev, reg_base);
3679 }
3680 
3681 static void gaudi2_stop_dma_qmans(struct hl_device *hdev)
3682 {
3683 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3684 	int dcore, inst;
3685 
3686 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3687 		goto stop_edma_qmans;
3688 
3689 	/* Stop CPs of PDMA QMANs */
3690 	gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE);
3691 	gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE);
3692 
3693 stop_edma_qmans:
3694 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3695 		return;
3696 
3697 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3698 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3699 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3700 			u32 qm_base;
3701 
3702 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3703 				continue;
3704 
3705 			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3706 					inst * DCORE_EDMA_OFFSET;
3707 
3708 			/* Stop CPs of EDMA QMANs */
3709 			gaudi2_stop_qman_common(hdev, qm_base);
3710 		}
3711 	}
3712 }
3713 
3714 static void gaudi2_stop_mme_qmans(struct hl_device *hdev)
3715 {
3716 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3717 	u32 offset, i;
3718 
3719 	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3720 
3721 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
3722 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)))
3723 			continue;
3724 
3725 		gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3726 	}
3727 }
3728 
3729 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev)
3730 {
3731 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3732 	u32 reg_base;
3733 	int i;
3734 
3735 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3736 		return;
3737 
3738 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3739 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3740 			continue;
3741 
3742 		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3743 		gaudi2_stop_qman_common(hdev, reg_base);
3744 	}
3745 }
3746 
3747 static void gaudi2_stop_rot_qmans(struct hl_device *hdev)
3748 {
3749 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3750 	u32 reg_base;
3751 	int i;
3752 
3753 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3754 		return;
3755 
3756 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3757 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3758 			continue;
3759 
3760 		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3761 		gaudi2_stop_qman_common(hdev, reg_base);
3762 	}
3763 }
3764 
3765 static void gaudi2_stop_nic_qmans(struct hl_device *hdev)
3766 {
3767 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3768 	u32 reg_base, queue_id;
3769 	int i;
3770 
3771 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3772 		return;
3773 
3774 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3775 
3776 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3777 		if (!(hdev->nic_ports_mask & BIT(i)))
3778 			continue;
3779 
3780 		reg_base = gaudi2_qm_blocks_bases[queue_id];
3781 		gaudi2_stop_qman_common(hdev, reg_base);
3782 	}
3783 }
3784 
3785 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base)
3786 {
3787 	u32 reg_val;
3788 
3789 	reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1);
3790 	WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val);
3791 }
3792 
3793 static void gaudi2_dma_stall(struct hl_device *hdev)
3794 {
3795 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3796 	int dcore, inst;
3797 
3798 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3799 		goto stall_edma;
3800 
3801 	gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE);
3802 	gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE);
3803 
3804 stall_edma:
3805 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3806 		return;
3807 
3808 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3809 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3810 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3811 			u32 core_base;
3812 
3813 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3814 				continue;
3815 
3816 			core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET +
3817 					inst * DCORE_EDMA_OFFSET;
3818 
3819 			/* Stall CPs of EDMA QMANs */
3820 			gaudi2_stall_dma_common(hdev, core_base);
3821 		}
3822 	}
3823 }
3824 
3825 static void gaudi2_mme_stall(struct hl_device *hdev)
3826 {
3827 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3828 	u32 offset, i;
3829 
3830 	offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL;
3831 
3832 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
3833 		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3834 			WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1);
3835 }
3836 
3837 static void gaudi2_tpc_stall(struct hl_device *hdev)
3838 {
3839 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3840 	u32 reg_base;
3841 	int i;
3842 
3843 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3844 		return;
3845 
3846 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3847 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3848 			continue;
3849 
3850 		reg_base = gaudi2_tpc_cfg_blocks_bases[i];
3851 		WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1);
3852 	}
3853 }
3854 
3855 static void gaudi2_rotator_stall(struct hl_device *hdev)
3856 {
3857 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3858 	u32 reg_val;
3859 	int i;
3860 
3861 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3862 		return;
3863 
3864 	reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) |
3865 			FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) |
3866 			FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1);
3867 
3868 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3869 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3870 			continue;
3871 
3872 		WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val);
3873 	}
3874 }
3875 
3876 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base)
3877 {
3878 	WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0);
3879 }
3880 
3881 static void gaudi2_disable_dma_qmans(struct hl_device *hdev)
3882 {
3883 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3884 	int dcore, inst;
3885 
3886 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3887 		goto stop_edma_qmans;
3888 
3889 	gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE);
3890 	gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE);
3891 
3892 stop_edma_qmans:
3893 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3894 		return;
3895 
3896 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3897 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3898 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3899 			u32 qm_base;
3900 
3901 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3902 				continue;
3903 
3904 			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3905 					inst * DCORE_EDMA_OFFSET;
3906 
3907 			/* Disable CPs of EDMA QMANs */
3908 			gaudi2_disable_qman_common(hdev, qm_base);
3909 		}
3910 	}
3911 }
3912 
3913 static void gaudi2_disable_mme_qmans(struct hl_device *hdev)
3914 {
3915 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3916 	u32 offset, i;
3917 
3918 	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3919 
3920 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
3921 		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3922 			gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3923 }
3924 
3925 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev)
3926 {
3927 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3928 	u32 reg_base;
3929 	int i;
3930 
3931 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3932 		return;
3933 
3934 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3935 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3936 			continue;
3937 
3938 		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3939 		gaudi2_disable_qman_common(hdev, reg_base);
3940 	}
3941 }
3942 
3943 static void gaudi2_disable_rot_qmans(struct hl_device *hdev)
3944 {
3945 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3946 	u32 reg_base;
3947 	int i;
3948 
3949 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3950 		return;
3951 
3952 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3953 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3954 			continue;
3955 
3956 		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3957 		gaudi2_disable_qman_common(hdev, reg_base);
3958 	}
3959 }
3960 
3961 static void gaudi2_disable_nic_qmans(struct hl_device *hdev)
3962 {
3963 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3964 	u32 reg_base, queue_id;
3965 	int i;
3966 
3967 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3968 		return;
3969 
3970 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3971 
3972 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3973 		if (!(hdev->nic_ports_mask & BIT(i)))
3974 			continue;
3975 
3976 		reg_base = gaudi2_qm_blocks_bases[queue_id];
3977 		gaudi2_disable_qman_common(hdev, reg_base);
3978 	}
3979 }
3980 
3981 static void gaudi2_enable_timestamp(struct hl_device *hdev)
3982 {
3983 	/* Disable the timestamp counter */
3984 	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
3985 
3986 	/* Zero the lower/upper parts of the 64-bit counter */
3987 	WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0);
3988 	WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0);
3989 
3990 	/* Enable the counter */
3991 	WREG32(mmPSOC_TIMESTAMP_BASE, 1);
3992 }
3993 
3994 static void gaudi2_disable_timestamp(struct hl_device *hdev)
3995 {
3996 	/* Disable the timestamp counter */
3997 	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
3998 }
3999 
4000 static const char *gaudi2_irq_name(u16 irq_number)
4001 {
4002 	switch (irq_number) {
4003 	case GAUDI2_IRQ_NUM_EVENT_QUEUE:
4004 		return "gaudi2 cpu eq";
4005 	case GAUDI2_IRQ_NUM_COMPLETION:
4006 		return "gaudi2 completion";
4007 	case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
4008 		return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
4009 	case GAUDI2_IRQ_NUM_TPC_ASSERT:
4010 		return "gaudi2 tpc assert";
4011 	case GAUDI2_IRQ_NUM_UNEXPECTED_ERROR:
4012 		return "gaudi2 tpc assert";
4013 	case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
4014 		return "gaudi2 user completion";
4015 	default:
4016 		return "invalid";
4017 	}
4018 }
4019 
4020 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
4021 {
4022 	int i, irq, relative_idx;
4023 	struct hl_dec *dec;
4024 
4025 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) {
4026 		irq = pci_irq_vector(hdev->pdev, i);
4027 		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4028 
4029 		dec = hdev->dec + relative_idx / 2;
4030 
4031 		/* We pass different structures depending on the irq handler. For the abnormal
4032 		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4033 		 * user_interrupt entry
4034 		 */
4035 		free_irq(irq, ((relative_idx % 2) ?
4036 				(void *) dec :
4037 				(void *) &hdev->user_interrupt[dec->core_id]));
4038 	}
4039 }
4040 
4041 static int gaudi2_dec_enable_msix(struct hl_device *hdev)
4042 {
4043 	int rc, i, irq_init_cnt, irq, relative_idx;
4044 	struct hl_dec *dec;
4045 
4046 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
4047 			i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM;
4048 			i++, irq_init_cnt++) {
4049 
4050 		irq = pci_irq_vector(hdev->pdev, i);
4051 		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4052 
4053 		/* We pass different structures depending on the irq handler. For the abnormal
4054 		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4055 		 * user_interrupt entry
4056 		 *
4057 		 * TODO: change the dec abnrm to threaded irq
4058 		 */
4059 
4060 		dec = hdev->dec + relative_idx / 2;
4061 		if (relative_idx % 2) {
4062 			rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0,
4063 						gaudi2_irq_name(i), (void *) dec);
4064 		} else {
4065 			rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4066 					hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4067 					gaudi2_irq_name(i),
4068 					(void *) &hdev->user_interrupt[dec->core_id]);
4069 		}
4070 
4071 		if (rc) {
4072 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4073 			goto free_dec_irqs;
4074 		}
4075 	}
4076 
4077 	return 0;
4078 
4079 free_dec_irqs:
4080 	gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt));
4081 	return rc;
4082 }
4083 
4084 static int gaudi2_enable_msix(struct hl_device *hdev)
4085 {
4086 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4087 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4088 	int rc, irq, i, j, user_irq_init_cnt;
4089 	struct hl_cq *cq;
4090 
4091 	if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
4092 		return 0;
4093 
4094 	rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
4095 					PCI_IRQ_MSIX);
4096 	if (rc < 0) {
4097 		dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n",
4098 			GAUDI2_MSIX_ENTRIES, rc);
4099 		return rc;
4100 	}
4101 
4102 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4103 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4104 	rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
4105 	if (rc) {
4106 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4107 		goto free_irq_vectors;
4108 	}
4109 
4110 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4111 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE),
4112 			&hdev->event_queue);
4113 	if (rc) {
4114 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4115 		goto free_completion_irq;
4116 	}
4117 
4118 	rc = gaudi2_dec_enable_msix(hdev);
4119 	if (rc) {
4120 		dev_err(hdev->dev, "Failed to enable decoder IRQ");
4121 		goto free_event_irq;
4122 	}
4123 
4124 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4125 	rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4126 			hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4127 			gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT), &hdev->tpc_interrupt);
4128 	if (rc) {
4129 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4130 		goto free_dec_irq;
4131 	}
4132 
4133 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4134 	rc = request_irq(irq, hl_irq_handler_user_interrupt, 0,
4135 			gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
4136 					&hdev->unexpected_error_interrupt);
4137 	if (rc) {
4138 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4139 		goto free_tpc_irq;
4140 	}
4141 
4142 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
4143 			user_irq_init_cnt < prop->user_interrupt_count;
4144 			i++, j++, user_irq_init_cnt++) {
4145 
4146 		irq = pci_irq_vector(hdev->pdev, i);
4147 		rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4148 						hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4149 						gaudi2_irq_name(i), &hdev->user_interrupt[j]);
4150 
4151 		if (rc) {
4152 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4153 			goto free_user_irq;
4154 		}
4155 	}
4156 
4157 	gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
4158 
4159 	return 0;
4160 
4161 free_user_irq:
4162 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count;
4163 			i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
4164 
4165 		irq = pci_irq_vector(hdev->pdev, i);
4166 		free_irq(irq, &hdev->user_interrupt[j]);
4167 	}
4168 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4169 	free_irq(irq, &hdev->unexpected_error_interrupt);
4170 free_tpc_irq:
4171 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4172 	free_irq(irq, &hdev->tpc_interrupt);
4173 free_dec_irq:
4174 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_DEC_LAST + 1);
4175 free_event_irq:
4176 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4177 	free_irq(irq, cq);
4178 
4179 free_completion_irq:
4180 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4181 	free_irq(irq, cq);
4182 
4183 free_irq_vectors:
4184 	pci_free_irq_vectors(hdev->pdev);
4185 
4186 	return rc;
4187 }
4188 
4189 static void gaudi2_sync_irqs(struct hl_device *hdev)
4190 {
4191 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4192 	int i, j;
4193 	int irq;
4194 
4195 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4196 		return;
4197 
4198 	/* Wait for all pending IRQs to be finished */
4199 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION));
4200 
4201 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) {
4202 		irq = pci_irq_vector(hdev->pdev, i);
4203 		synchronize_irq(irq);
4204 	}
4205 
4206 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT));
4207 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR));
4208 
4209 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
4210 										i++, j++) {
4211 		irq = pci_irq_vector(hdev->pdev, i);
4212 		synchronize_irq(irq);
4213 	}
4214 
4215 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
4216 }
4217 
4218 static void gaudi2_disable_msix(struct hl_device *hdev)
4219 {
4220 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4221 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4222 	struct hl_cq *cq;
4223 	int irq, i, j, k;
4224 
4225 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4226 		return;
4227 
4228 	gaudi2_sync_irqs(hdev);
4229 
4230 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4231 	free_irq(irq, &hdev->event_queue);
4232 
4233 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
4234 
4235 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4236 	free_irq(irq, &hdev->tpc_interrupt);
4237 
4238 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4239 	free_irq(irq, &hdev->unexpected_error_interrupt);
4240 
4241 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
4242 			k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
4243 
4244 		irq = pci_irq_vector(hdev->pdev, i);
4245 		free_irq(irq, &hdev->user_interrupt[j]);
4246 	}
4247 
4248 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4249 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4250 	free_irq(irq, cq);
4251 
4252 	pci_free_irq_vectors(hdev->pdev);
4253 
4254 	gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
4255 }
4256 
4257 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id)
4258 {
4259 	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4260 	u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4261 	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4262 	int rc;
4263 
4264 	if (hdev->pldm)
4265 		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4266 	else
4267 		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4268 
4269 	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4270 		dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
4271 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4272 			continue;
4273 
4274 		offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET;
4275 
4276 		WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0);
4277 
4278 		WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4279 
4280 		/* Wait till all traffic from decoder stops
4281 		 * before apply core reset.
4282 		 */
4283 		rc = hl_poll_timeout(
4284 				hdev,
4285 				mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4286 				graceful,
4287 				(graceful & graceful_pend_mask),
4288 				100,
4289 				timeout_usec);
4290 		if (rc)
4291 			dev_err(hdev->dev,
4292 				"Failed to stop traffic from DCORE%d Decoder %d\n",
4293 				dcore_id, dec_id);
4294 	}
4295 }
4296 
4297 static void gaudi2_stop_pcie_dec(struct hl_device *hdev)
4298 {
4299 	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4300 	u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4301 	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4302 	int rc;
4303 
4304 	if (hdev->pldm)
4305 		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4306 	else
4307 		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4308 
4309 	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4310 		dec_bit = PCIE_DEC_SHIFT + dec_id;
4311 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4312 			continue;
4313 
4314 		offset = dec_id * PCIE_VDEC_OFFSET;
4315 
4316 		WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0);
4317 
4318 		WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4319 
4320 		/* Wait till all traffic from decoder stops
4321 		 * before apply core reset.
4322 		 */
4323 		rc = hl_poll_timeout(
4324 				hdev,
4325 				mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4326 				graceful,
4327 				(graceful & graceful_pend_mask),
4328 				100,
4329 				timeout_usec);
4330 		if (rc)
4331 			dev_err(hdev->dev,
4332 				"Failed to stop traffic from PCIe Decoder %d\n",
4333 				dec_id);
4334 	}
4335 }
4336 
4337 static void gaudi2_stop_dec(struct hl_device *hdev)
4338 {
4339 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4340 	int dcore_id;
4341 
4342 	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0)
4343 		return;
4344 
4345 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
4346 		gaudi2_stop_dcore_dec(hdev, dcore_id);
4347 
4348 	gaudi2_stop_pcie_dec(hdev);
4349 }
4350 
4351 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4352 {
4353 	u32 reg_base, reg_val;
4354 
4355 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
4356 	if (run_mode == HL_ENGINE_CORE_RUN)
4357 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
4358 	else
4359 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
4360 
4361 	WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
4362 }
4363 
4364 static void gaudi2_halt_arcs(struct hl_device *hdev)
4365 {
4366 	u16 arc_id;
4367 
4368 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
4369 		if (gaudi2_is_arc_enabled(hdev, arc_id))
4370 			gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
4371 	}
4372 }
4373 
4374 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4375 {
4376 	int rc;
4377 	u32 reg_base, val, ack_mask, timeout_usec = 100000;
4378 
4379 	if (hdev->pldm)
4380 		timeout_usec *= 100;
4381 
4382 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
4383 	if (run_mode == HL_ENGINE_CORE_RUN)
4384 		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
4385 	else
4386 		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
4387 
4388 	rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
4389 				val, ((val & ack_mask) == ack_mask),
4390 				1000, timeout_usec);
4391 
4392 	if (!rc) {
4393 		/* Clear */
4394 		val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
4395 		WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
4396 	}
4397 
4398 	return rc;
4399 }
4400 
4401 static void gaudi2_reset_arcs(struct hl_device *hdev)
4402 {
4403 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4404 	u16 arc_id;
4405 
4406 	if (!gaudi2)
4407 		return;
4408 
4409 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++)
4410 		if (gaudi2_is_arc_enabled(hdev, arc_id))
4411 			gaudi2_clr_arc_id_cap(hdev, arc_id);
4412 }
4413 
4414 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
4415 {
4416 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4417 	u32 queue_id;
4418 	int i;
4419 
4420 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4421 		return;
4422 
4423 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4424 
4425 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4426 		if (!(hdev->nic_ports_mask & BIT(i)))
4427 			continue;
4428 
4429 		gaudi2_qman_manual_flush_common(hdev, queue_id);
4430 	}
4431 }
4432 
4433 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
4434 					u32 num_cores, u32 core_command)
4435 {
4436 	int i, rc;
4437 
4438 	for (i = 0 ; i < num_cores ; i++) {
4439 		if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
4440 			gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
4441 	}
4442 
4443 	for (i = 0 ; i < num_cores ; i++) {
4444 		if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
4445 			rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
4446 
4447 			if (rc) {
4448 				dev_err(hdev->dev, "failed to %s arc: %d\n",
4449 					(core_command == HL_ENGINE_CORE_HALT) ?
4450 					"HALT" : "RUN", core_ids[i]);
4451 				return -1;
4452 			}
4453 		}
4454 	}
4455 
4456 	return 0;
4457 }
4458 
4459 static int gaudi2_set_tpc_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4460 {
4461 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4462 	u32 reg_base, reg_addr, reg_val, tpc_id;
4463 
4464 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4465 		return 0;
4466 
4467 	tpc_id = gaudi2_tpc_engine_id_to_tpc_id[engine_id];
4468 	if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + tpc_id)))
4469 		return 0;
4470 
4471 	reg_base = gaudi2_tpc_cfg_blocks_bases[tpc_id];
4472 	reg_addr = reg_base + TPC_CFG_STALL_OFFSET;
4473 	reg_val = FIELD_PREP(DCORE0_TPC0_CFG_TPC_STALL_V_MASK,
4474 			!!(engine_command == HL_ENGINE_STALL));
4475 	WREG32(reg_addr, reg_val);
4476 
4477 	if (engine_command == HL_ENGINE_RESUME) {
4478 		reg_base = gaudi2_tpc_eml_cfg_blocks_bases[tpc_id];
4479 		reg_addr = reg_base + TPC_EML_CFG_DBG_CNT_OFFSET;
4480 		RMWREG32(reg_addr, 0x1, DCORE0_TPC0_EML_CFG_DBG_CNT_DBG_EXIT_MASK);
4481 	}
4482 
4483 	return 0;
4484 }
4485 
4486 static int gaudi2_set_mme_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4487 {
4488 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4489 	u32 reg_base, reg_addr, reg_val, mme_id;
4490 
4491 	mme_id = gaudi2_mme_engine_id_to_mme_id[engine_id];
4492 	if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + mme_id)))
4493 		return 0;
4494 
4495 	reg_base = gaudi2_mme_ctrl_lo_blocks_bases[mme_id];
4496 	reg_addr = reg_base + MME_CTRL_LO_QM_STALL_OFFSET;
4497 	reg_val = FIELD_PREP(DCORE0_MME_CTRL_LO_QM_STALL_V_MASK,
4498 			!!(engine_command == HL_ENGINE_STALL));
4499 	WREG32(reg_addr, reg_val);
4500 
4501 	return 0;
4502 }
4503 
4504 static int gaudi2_set_edma_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4505 {
4506 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4507 	u32 reg_base, reg_addr, reg_val, edma_id;
4508 
4509 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4510 		return 0;
4511 
4512 	edma_id = gaudi2_edma_engine_id_to_edma_id[engine_id];
4513 	if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + edma_id)))
4514 		return 0;
4515 
4516 	reg_base = gaudi2_dma_core_blocks_bases[edma_id];
4517 	reg_addr = reg_base + EDMA_CORE_CFG_STALL_OFFSET;
4518 	reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK,
4519 			!!(engine_command == HL_ENGINE_STALL));
4520 	WREG32(reg_addr, reg_val);
4521 
4522 	if (engine_command == HL_ENGINE_STALL) {
4523 		reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK, 0x1) |
4524 				FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_FLUSH_MASK, 0x1);
4525 		WREG32(reg_addr, reg_val);
4526 	}
4527 
4528 	return 0;
4529 }
4530 
4531 static int gaudi2_set_engine_modes(struct hl_device *hdev,
4532 		u32 *engine_ids, u32 num_engines, u32 engine_command)
4533 {
4534 	int i, rc;
4535 
4536 	for (i = 0 ; i < num_engines ; ++i) {
4537 		switch (engine_ids[i]) {
4538 		case GAUDI2_DCORE0_ENGINE_ID_TPC_0 ... GAUDI2_DCORE0_ENGINE_ID_TPC_5:
4539 		case GAUDI2_DCORE1_ENGINE_ID_TPC_0 ... GAUDI2_DCORE1_ENGINE_ID_TPC_5:
4540 		case GAUDI2_DCORE2_ENGINE_ID_TPC_0 ... GAUDI2_DCORE2_ENGINE_ID_TPC_5:
4541 		case GAUDI2_DCORE3_ENGINE_ID_TPC_0 ... GAUDI2_DCORE3_ENGINE_ID_TPC_5:
4542 			rc = gaudi2_set_tpc_engine_mode(hdev, engine_ids[i], engine_command);
4543 			if (rc)
4544 				return rc;
4545 
4546 			break;
4547 		case GAUDI2_DCORE0_ENGINE_ID_MME:
4548 		case GAUDI2_DCORE1_ENGINE_ID_MME:
4549 		case GAUDI2_DCORE2_ENGINE_ID_MME:
4550 		case GAUDI2_DCORE3_ENGINE_ID_MME:
4551 			rc = gaudi2_set_mme_engine_mode(hdev, engine_ids[i], engine_command);
4552 			if (rc)
4553 				return rc;
4554 
4555 			break;
4556 		case GAUDI2_DCORE0_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE0_ENGINE_ID_EDMA_1:
4557 		case GAUDI2_DCORE1_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE1_ENGINE_ID_EDMA_1:
4558 		case GAUDI2_DCORE2_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE2_ENGINE_ID_EDMA_1:
4559 		case GAUDI2_DCORE3_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE3_ENGINE_ID_EDMA_1:
4560 			rc = gaudi2_set_edma_engine_mode(hdev, engine_ids[i], engine_command);
4561 			if (rc)
4562 				return rc;
4563 
4564 			break;
4565 		default:
4566 			dev_err(hdev->dev, "Invalid engine ID %u\n", engine_ids[i]);
4567 			return -EINVAL;
4568 		}
4569 	}
4570 
4571 	return 0;
4572 }
4573 
4574 static int gaudi2_set_engines(struct hl_device *hdev, u32 *engine_ids,
4575 					u32 num_engines, u32 engine_command)
4576 {
4577 	switch (engine_command) {
4578 	case HL_ENGINE_CORE_HALT:
4579 	case HL_ENGINE_CORE_RUN:
4580 		return gaudi2_set_engine_cores(hdev, engine_ids, num_engines, engine_command);
4581 
4582 	case HL_ENGINE_STALL:
4583 	case HL_ENGINE_RESUME:
4584 		return gaudi2_set_engine_modes(hdev, engine_ids, num_engines, engine_command);
4585 
4586 	default:
4587 		dev_err(hdev->dev, "failed to execute command id %u\n", engine_command);
4588 		return -EINVAL;
4589 	}
4590 }
4591 
4592 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4593 {
4594 	u32 wait_timeout_ms;
4595 
4596 	if (hdev->pldm)
4597 		wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC;
4598 	else
4599 		wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
4600 
4601 	if (fw_reset)
4602 		goto skip_engines;
4603 
4604 	gaudi2_stop_dma_qmans(hdev);
4605 	gaudi2_stop_mme_qmans(hdev);
4606 	gaudi2_stop_tpc_qmans(hdev);
4607 	gaudi2_stop_rot_qmans(hdev);
4608 	gaudi2_stop_nic_qmans(hdev);
4609 	msleep(wait_timeout_ms);
4610 
4611 	gaudi2_halt_arcs(hdev);
4612 	gaudi2_dma_stall(hdev);
4613 	gaudi2_mme_stall(hdev);
4614 	gaudi2_tpc_stall(hdev);
4615 	gaudi2_rotator_stall(hdev);
4616 
4617 	msleep(wait_timeout_ms);
4618 
4619 	gaudi2_stop_dec(hdev);
4620 
4621 	/*
4622 	 * in case of soft reset do a manual flush for QMANs (currently called
4623 	 * only for NIC QMANs
4624 	 */
4625 	if (!hard_reset)
4626 		gaudi2_nic_qmans_manual_flush(hdev);
4627 
4628 	gaudi2_disable_dma_qmans(hdev);
4629 	gaudi2_disable_mme_qmans(hdev);
4630 	gaudi2_disable_tpc_qmans(hdev);
4631 	gaudi2_disable_rot_qmans(hdev);
4632 	gaudi2_disable_nic_qmans(hdev);
4633 	gaudi2_disable_timestamp(hdev);
4634 
4635 skip_engines:
4636 	if (hard_reset) {
4637 		gaudi2_disable_msix(hdev);
4638 		return;
4639 	}
4640 
4641 	gaudi2_sync_irqs(hdev);
4642 }
4643 
4644 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
4645 {
4646 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
4647 
4648 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
4649 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
4650 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
4651 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
4652 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
4653 	pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
4654 }
4655 
4656 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
4657 {
4658 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4659 	struct dynamic_fw_load_mgr *dynamic_loader;
4660 	struct cpu_dyn_regs *dyn_regs;
4661 
4662 	/* fill common fields */
4663 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
4664 	fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE;
4665 	fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE;
4666 	fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC;
4667 	fw_loader->skip_bmc = false;
4668 	fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
4669 	fw_loader->dram_bar_id = DRAM_BAR_ID;
4670 	fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
4671 
4672 	/* here we update initial values for few specific dynamic regs (as
4673 	 * before reading the first descriptor from FW those value has to be
4674 	 * hard-coded). in later stages of the protocol those values will be
4675 	 * updated automatically by reading the FW descriptor so data there
4676 	 * will always be up-to-date
4677 	 */
4678 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
4679 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
4680 	dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
4681 	dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
4682 	dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC;
4683 }
4684 
4685 static int gaudi2_init_cpu(struct hl_device *hdev)
4686 {
4687 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4688 	int rc;
4689 
4690 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4691 		return 0;
4692 
4693 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU)
4694 		return 0;
4695 
4696 	rc = hl_fw_init_cpu(hdev);
4697 	if (rc)
4698 		return rc;
4699 
4700 	gaudi2->hw_cap_initialized |= HW_CAP_CPU;
4701 
4702 	return 0;
4703 }
4704 
4705 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4706 {
4707 	struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
4708 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4709 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4710 	struct cpu_dyn_regs *dyn_regs;
4711 	struct hl_eq *eq;
4712 	u32 status;
4713 	int err;
4714 
4715 	if (!hdev->cpu_queues_enable)
4716 		return 0;
4717 
4718 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
4719 		return 0;
4720 
4721 	eq = &hdev->event_queue;
4722 
4723 	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4724 
4725 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4726 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4727 
4728 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4729 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4730 
4731 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address));
4732 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address));
4733 
4734 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4735 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4736 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4737 
4738 	/* Used for EQ CI */
4739 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4740 
4741 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
4742 
4743 	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4744 
4745 	/* Let the ARC know we are ready as it is now handling those queues  */
4746 
4747 	WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
4748 		gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
4749 
4750 	err = hl_poll_timeout(
4751 		hdev,
4752 		mmCPU_IF_QUEUE_INIT,
4753 		status,
4754 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
4755 		1000,
4756 		cpu_timeout);
4757 
4758 	if (err) {
4759 		dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n");
4760 		return -EIO;
4761 	}
4762 
4763 	/* update FW application security bits */
4764 	if (prop->fw_cpu_boot_dev_sts0_valid)
4765 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4766 
4767 	if (prop->fw_cpu_boot_dev_sts1_valid)
4768 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4769 
4770 	gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q;
4771 	return 0;
4772 }
4773 
4774 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
4775 				u32 queue_id_base)
4776 {
4777 	struct hl_hw_queue *q;
4778 	u32 pq_id, pq_offset;
4779 
4780 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4781 		q = &hdev->kernel_queues[queue_id_base + pq_id];
4782 		pq_offset = pq_id * 4;
4783 
4784 		WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
4785 				lower_32_bits(q->bus_address));
4786 		WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
4787 				upper_32_bits(q->bus_address));
4788 		WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
4789 		WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
4790 		WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
4791 	}
4792 }
4793 
4794 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base)
4795 {
4796 	u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi;
4797 
4798 	mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4799 	mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4800 	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4801 	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4802 
4803 	for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) {
4804 		cp_offset = cp_id * 4;
4805 
4806 		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo);
4807 		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset,	mtr_base_hi);
4808 		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset,	so_base_lo);
4809 		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset,	so_base_hi);
4810 	}
4811 
4812 	/* allow QMANs to accept work from ARC CQF */
4813 	WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1));
4814 }
4815 
4816 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base,
4817 				u32 queue_id_base)
4818 {
4819 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4820 	u32 pq_id, pq_offset, so_base_lo, so_base_hi;
4821 
4822 	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4823 	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4824 
4825 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4826 		pq_offset = pq_id * 4;
4827 
4828 		/* Configure QMAN HBW to scratchpad as it is not needed */
4829 		WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset,
4830 				lower_32_bits(gaudi2->scratchpad_bus_address));
4831 		WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset,
4832 				upper_32_bits(gaudi2->scratchpad_bus_address));
4833 		WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset,
4834 				ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry)));
4835 
4836 		WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0);
4837 		WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA);
4838 		WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo);
4839 		WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi);
4840 	}
4841 
4842 	/* Enable QMAN H/W completion */
4843 	WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
4844 }
4845 
4846 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base)
4847 {
4848 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4849 	u32 sp_reg_addr;
4850 
4851 	switch (queue_id_base) {
4852 	case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3:
4853 		fallthrough;
4854 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
4855 		fallthrough;
4856 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
4857 		fallthrough;
4858 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
4859 		fallthrough;
4860 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
4861 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
4862 		break;
4863 	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
4864 		fallthrough;
4865 	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
4866 		fallthrough;
4867 	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
4868 		fallthrough;
4869 	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
4870 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
4871 		break;
4872 	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
4873 		fallthrough;
4874 	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
4875 		fallthrough;
4876 	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
4877 		fallthrough;
4878 	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
4879 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
4880 		break;
4881 	case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3:
4882 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl);
4883 		break;
4884 	case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3:
4885 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
4886 		break;
4887 	default:
4888 		dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base);
4889 		return 0;
4890 	}
4891 
4892 	return sp_reg_addr;
4893 }
4894 
4895 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
4896 					u32 queue_id_base)
4897 {
4898 	u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset;
4899 	int map_table_entry;
4900 
4901 	WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot);
4902 
4903 	irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base);
4904 	WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset));
4905 	WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset));
4906 
4907 	map_table_entry = gaudi2_qman_async_event_id[queue_id_base];
4908 	WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET,
4909 		gaudi2_irq_map_table[map_table_entry].cpu_id);
4910 
4911 	WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK);
4912 
4913 	WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT);
4914 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0);
4915 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
4916 
4917 	/* Enable the QMAN channel.
4918 	 * PDMA QMAN configuration is different, as we do not allow user to
4919 	 * access some of the CPs.
4920 	 * PDMA0: CP2/3 are reserved for the ARC usage.
4921 	 * PDMA1: CP1/2/3 are reserved for the ARC usage.
4922 	 */
4923 	if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
4924 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
4925 	else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
4926 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
4927 	else
4928 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
4929 }
4930 
4931 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
4932 		u32 queue_id_base)
4933 {
4934 	u32 pq_id;
4935 
4936 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
4937 		hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
4938 
4939 	gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
4940 	gaudi2_init_qman_cp(hdev, reg_base);
4941 	gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base);
4942 	gaudi2_init_qman_common(hdev, reg_base, queue_id_base);
4943 }
4944 
4945 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base,
4946 				u32 dma_core_id, bool is_secure)
4947 {
4948 	u32 prot, irq_handler_offset;
4949 	struct cpu_dyn_regs *dyn_regs;
4950 	int map_table_entry;
4951 
4952 	prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT;
4953 	if (is_secure)
4954 		prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT;
4955 
4956 	WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot);
4957 
4958 	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4959 	irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
4960 
4961 	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET,
4962 			lower_32_bits(CFG_BASE + irq_handler_offset));
4963 
4964 	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET,
4965 			upper_32_bits(CFG_BASE + irq_handler_offset));
4966 
4967 	map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id];
4968 	WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET,
4969 		gaudi2_irq_map_table[map_table_entry].cpu_id);
4970 
4971 	/* Enable the DMA channel */
4972 	WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT);
4973 }
4974 
4975 static void gaudi2_init_kdma(struct hl_device *hdev)
4976 {
4977 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4978 	u32 reg_base;
4979 
4980 	if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA)
4981 		return;
4982 
4983 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA];
4984 
4985 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true);
4986 
4987 	gaudi2->hw_cap_initialized |= HW_CAP_KDMA;
4988 }
4989 
4990 static void gaudi2_init_pdma(struct hl_device *hdev)
4991 {
4992 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4993 	u32 reg_base;
4994 
4995 	if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK)
4996 		return;
4997 
4998 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0];
4999 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false);
5000 
5001 	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0];
5002 	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0);
5003 
5004 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1];
5005 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false);
5006 
5007 	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0];
5008 	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0);
5009 
5010 	gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK;
5011 }
5012 
5013 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq)
5014 {
5015 	u32 reg_base, base_edma_core_id, base_edma_qman_id;
5016 
5017 	base_edma_core_id = DMA_CORE_ID_EDMA0 + seq;
5018 	base_edma_qman_id = edma_stream_base[seq];
5019 
5020 	reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id];
5021 	gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false);
5022 
5023 	reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id];
5024 	gaudi2_init_qman(hdev, reg_base, base_edma_qman_id);
5025 }
5026 
5027 static void gaudi2_init_edma(struct hl_device *hdev)
5028 {
5029 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5030 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5031 	int dcore, inst;
5032 
5033 	if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK)
5034 		return;
5035 
5036 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
5037 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
5038 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
5039 
5040 			if (!(prop->edma_enabled_mask & BIT(seq)))
5041 				continue;
5042 
5043 			gaudi2_init_edma_instance(hdev, seq);
5044 
5045 			gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq);
5046 		}
5047 	}
5048 }
5049 
5050 /*
5051  * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
5052  * @hdev: pointer to habanalabs device structure.
5053  * @sob_id: sync object ID.
5054  * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
5055  * @interrupt_id: interrupt ID.
5056  *
5057  * Some initiators cannot have HBW address in their completion address registers, and thus cannot
5058  * write directly to the HBW host memory of the virtual MSI-X doorbell.
5059  * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
5060  *
5061  * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
5062  * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
5063  * completion, by decrementing the sync object value and re-arming the monitor.
5064  */
5065 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
5066 							u32 first_mon_id, u32 interrupt_id)
5067 {
5068 	u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
5069 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5070 	u64 addr;
5071 	u8 mask;
5072 
5073 	/* Reset the SOB value */
5074 	sob_offset = sob_id * sizeof(u32);
5075 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
5076 
5077 	/* Configure 3 monitors:
5078 	 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
5079 	 * 2. Decrement SOB value by 1.
5080 	 * 3. Re-arm the master monitor.
5081 	 */
5082 
5083 	first_mon_offset = first_mon_id * sizeof(u32);
5084 
5085 	/* 2nd monitor: Decrement SOB value by 1 */
5086 	mon_offset = first_mon_offset + sizeof(u32);
5087 
5088 	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
5089 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5090 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5091 
5092 	payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
5093 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
5094 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
5095 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5096 
5097 	/* 3rd monitor: Re-arm the master monitor */
5098 	mon_offset = first_mon_offset + 2 * sizeof(u32);
5099 
5100 	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
5101 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5102 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5103 
5104 	sob_group = sob_id / 8;
5105 	mask = ~BIT(sob_id & 0x7);
5106 	mode = 0; /* comparison mode is "greater than or equal to" */
5107 	arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
5108 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
5109 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
5110 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
5111 
5112 	payload = arm;
5113 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5114 
5115 	/* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
5116 	mon_offset = first_mon_offset;
5117 
5118 	config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
5119 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
5120 
5121 	addr = gaudi2->virt_msix_db_dma_addr;
5122 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5123 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5124 
5125 	payload = interrupt_id;
5126 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5127 
5128 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
5129 }
5130 
5131 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
5132 {
5133 	u32 decoder_id, sob_id, first_mon_id, interrupt_id;
5134 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5135 
5136 	/* Decoder normal/abnormal interrupts */
5137 	for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
5138 		if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
5139 			continue;
5140 
5141 		sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5142 		first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
5143 		interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
5144 		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5145 
5146 		sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5147 		first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
5148 		interrupt_id += 1;
5149 		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5150 	}
5151 }
5152 
5153 static void gaudi2_init_sm(struct hl_device *hdev)
5154 {
5155 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5156 	u64 cq_address;
5157 	u32 reg_val;
5158 	int i;
5159 
5160 	/* Enable HBW/LBW CQ for completion monitors */
5161 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5162 	reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1);
5163 
5164 	for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++)
5165 		WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5166 
5167 	/* Enable only HBW CQ for KDMA completion monitor */
5168 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5169 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5170 
5171 	/* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */
5172 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr));
5173 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr));
5174 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
5175 
5176 	for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
5177 		cq_address =
5178 			hdev->completion_queue[i].bus_address;
5179 
5180 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i),
5181 							lower_32_bits(cq_address));
5182 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i),
5183 							upper_32_bits(cq_address));
5184 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i),
5185 							ilog2(HL_CQ_SIZE_IN_BYTES));
5186 	}
5187 
5188 	/* Configure kernel ASID and MMU BP*/
5189 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
5190 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
5191 
5192 	/* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
5193 	gaudi2_prepare_sm_for_virt_msix_db(hdev);
5194 }
5195 
5196 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
5197 {
5198 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5199 	u32 reg_val;
5200 	int i;
5201 
5202 	reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0);
5203 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1);
5204 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1);
5205 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1);
5206 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1);
5207 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1);
5208 
5209 	WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val);
5210 	WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF);
5211 
5212 	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) {
5213 		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i);
5214 		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]);
5215 	}
5216 }
5217 
5218 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
5219 							bool config_qman_only)
5220 {
5221 	u32 queue_id_base, reg_base;
5222 
5223 	switch (dcore_id) {
5224 	case 0:
5225 		queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
5226 		break;
5227 	case 1:
5228 		queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
5229 		break;
5230 	case 2:
5231 		queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
5232 		break;
5233 	case 3:
5234 		queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
5235 		break;
5236 	default:
5237 		dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
5238 		return;
5239 	}
5240 
5241 	if (!config_qman_only) {
5242 		reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
5243 		gaudi2_init_mme_acc(hdev, reg_base);
5244 	}
5245 
5246 	reg_base = gaudi2_qm_blocks_bases[queue_id_base];
5247 	gaudi2_init_qman(hdev, reg_base, queue_id_base);
5248 }
5249 
5250 static void gaudi2_init_mme(struct hl_device *hdev)
5251 {
5252 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5253 	int i;
5254 
5255 	if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK)
5256 		return;
5257 
5258 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
5259 		gaudi2_init_dcore_mme(hdev, i, false);
5260 
5261 		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i);
5262 	}
5263 }
5264 
5265 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base)
5266 {
5267 	/* Mask arithmetic and QM interrupts in TPC */
5268 	WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE);
5269 
5270 	/* Set 16 cache lines */
5271 	WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET,
5272 			2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT);
5273 }
5274 
5275 struct gaudi2_tpc_init_cfg_data {
5276 	enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES];
5277 };
5278 
5279 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
5280 					u32 offset, struct iterate_module_ctx *ctx)
5281 {
5282 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5283 	struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
5284 	u32 queue_id_base;
5285 	u8 seq;
5286 
5287 	queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN);
5288 
5289 	if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1))
5290 		/* gets last sequence number */
5291 		seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE;
5292 	else
5293 		seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
5294 
5295 	gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset);
5296 	gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base);
5297 
5298 	gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq);
5299 }
5300 
5301 static void gaudi2_init_tpc(struct hl_device *hdev)
5302 {
5303 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5304 	struct gaudi2_tpc_init_cfg_data init_cfg_data;
5305 	struct iterate_module_ctx tpc_iter;
5306 
5307 	if (!hdev->asic_prop.tpc_enabled_mask)
5308 		return;
5309 
5310 	if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK)
5311 		return;
5312 
5313 	init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0;
5314 	init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0;
5315 	init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0;
5316 	init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0;
5317 	tpc_iter.fn = &gaudi2_init_tpc_config;
5318 	tpc_iter.data = &init_cfg_data;
5319 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
5320 }
5321 
5322 static void gaudi2_init_rotator(struct hl_device *hdev)
5323 {
5324 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5325 	u32 i, reg_base, queue_id;
5326 
5327 	queue_id = GAUDI2_QUEUE_ID_ROT_0_0;
5328 
5329 	for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
5330 		reg_base = gaudi2_qm_blocks_bases[queue_id];
5331 		gaudi2_init_qman(hdev, reg_base, queue_id);
5332 
5333 		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i);
5334 	}
5335 }
5336 
5337 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
5338 {
5339 	u32 sob_id;
5340 
5341 	/* VCMD normal interrupt */
5342 	sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5343 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
5344 			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5345 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5346 
5347 	/* VCMD abnormal interrupt */
5348 	sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5349 	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
5350 			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5351 	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5352 }
5353 
5354 static void gaudi2_init_dec(struct hl_device *hdev)
5355 {
5356 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5357 	u32 dcore_id, dec_id, dec_bit;
5358 	u64 base_addr;
5359 
5360 	if (!hdev->asic_prop.decoder_enabled_mask)
5361 		return;
5362 
5363 	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK)
5364 		return;
5365 
5366 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5367 		for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
5368 			dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
5369 
5370 			if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5371 				continue;
5372 
5373 			base_addr =  mmDCORE0_DEC0_CMD_BASE +
5374 					BRDG_CTRL_BLOCK_OFFSET +
5375 					dcore_id * DCORE_OFFSET +
5376 					dec_id * DCORE_VDEC_OFFSET;
5377 
5378 			gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5379 
5380 			gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5381 		}
5382 
5383 	for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) {
5384 		dec_bit = PCIE_DEC_SHIFT + dec_id;
5385 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5386 			continue;
5387 
5388 		base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET +
5389 				dec_id * DCORE_VDEC_OFFSET;
5390 
5391 		gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5392 
5393 		gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5394 	}
5395 }
5396 
5397 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
5398 					u32 stlb_base, u32 asid, u64 phys_addr)
5399 {
5400 	u32 status, timeout_usec;
5401 	int rc;
5402 
5403 	if (hdev->pldm || !hdev->pdev)
5404 		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5405 	else
5406 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5407 
5408 	WREG32(stlb_base + STLB_ASID_OFFSET, asid);
5409 	WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
5410 	WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT);
5411 	WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000);
5412 
5413 	rc = hl_poll_timeout(
5414 		hdev,
5415 		stlb_base + STLB_BUSY_OFFSET,
5416 		status,
5417 		!(status & 0x80000000),
5418 		1000,
5419 		timeout_usec);
5420 
5421 	if (rc) {
5422 		dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid);
5423 		return rc;
5424 	}
5425 
5426 	return 0;
5427 }
5428 
5429 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base,
5430 					u32 start_offset, u32 inv_start_val,
5431 					u32 flags)
5432 {
5433 	/* clear PMMU mem line cache (only needed in mmu range invalidation) */
5434 	if (flags & MMU_OP_CLEAR_MEMCACHE)
5435 		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1);
5436 
5437 	if (flags & MMU_OP_SKIP_LOW_CACHE_INV)
5438 		return;
5439 
5440 	WREG32(stlb_base + start_offset, inv_start_val);
5441 }
5442 
5443 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base,
5444 						struct gaudi2_cache_invld_params *inv_params)
5445 {
5446 	u32 status, timeout_usec, start_offset;
5447 	int rc;
5448 
5449 	timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC :
5450 					GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5451 
5452 	/* poll PMMU mem line cache (only needed in mmu range invalidation) */
5453 	if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) {
5454 		rc = hl_poll_timeout(
5455 			hdev,
5456 			mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS,
5457 			status,
5458 			status & 0x1,
5459 			1000,
5460 			timeout_usec);
5461 
5462 		if (rc)
5463 			return rc;
5464 
5465 		/* Need to manually reset the status to 0 */
5466 		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0);
5467 	}
5468 
5469 	/* Lower cache does not work with cache lines, hence we can skip its
5470 	 * invalidation upon map and invalidate only upon unmap
5471 	 */
5472 	if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV)
5473 		return 0;
5474 
5475 	start_offset = inv_params->range_invalidation ?
5476 			STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET;
5477 
5478 	rc = hl_poll_timeout(
5479 		hdev,
5480 		stlb_base + start_offset,
5481 		status,
5482 		!(status & 0x1),
5483 		1000,
5484 		timeout_usec);
5485 
5486 	return rc;
5487 }
5488 
5489 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id)
5490 {
5491 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5492 	u32 hw_cap;
5493 
5494 	hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id);
5495 
5496 	if (gaudi2->hw_cap_initialized & hw_cap)
5497 		return true;
5498 
5499 	return false;
5500 }
5501 
5502 /* this function shall be called only for HMMUs for which capability bit is set */
5503 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id)
5504 {
5505 	u32 offset;
5506 
5507 	offset =  (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5508 	return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset);
5509 }
5510 
5511 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base,
5512 						struct gaudi2_cache_invld_params *inv_params)
5513 {
5514 	u32 start_offset;
5515 
5516 	if (inv_params->range_invalidation) {
5517 		/* Set the addresses range
5518 		 * Note: that the start address we set in register, is not included in
5519 		 * the range of the invalidation, by design.
5520 		 * that's why we need to set lower address than the one we actually
5521 		 * want to be included in the range invalidation.
5522 		 */
5523 		u64 start = inv_params->start_va - 1;
5524 
5525 		start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET;
5526 
5527 		WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET,
5528 				start >> MMU_RANGE_INV_VA_LSB_SHIFT);
5529 
5530 		WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET,
5531 				start >> MMU_RANGE_INV_VA_MSB_SHIFT);
5532 
5533 		WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET,
5534 				inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT);
5535 
5536 		WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET,
5537 				inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT);
5538 	} else {
5539 		start_offset = STLB_INV_ALL_START_OFFSET;
5540 	}
5541 
5542 	gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset,
5543 						inv_params->inv_start_val, inv_params->flags);
5544 }
5545 
5546 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev,
5547 						int dcore_id, int hmmu_id,
5548 						struct gaudi2_cache_invld_params *inv_params)
5549 {
5550 	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5551 
5552 	gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params);
5553 }
5554 
5555 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev,
5556 						int dcore_id, int hmmu_id,
5557 						struct gaudi2_cache_invld_params *inv_params)
5558 {
5559 	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5560 
5561 	return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params);
5562 }
5563 
5564 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev,
5565 						struct gaudi2_cache_invld_params *inv_params)
5566 {
5567 	int dcore_id, hmmu_id;
5568 
5569 	/* first send all invalidation commands */
5570 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5571 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5572 			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5573 				continue;
5574 
5575 			gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params);
5576 		}
5577 	}
5578 
5579 	/* next, poll all invalidations status */
5580 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5581 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5582 			int rc;
5583 
5584 			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5585 				continue;
5586 
5587 			rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id,
5588 										inv_params);
5589 			if (rc)
5590 				return rc;
5591 		}
5592 	}
5593 
5594 	return 0;
5595 }
5596 
5597 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
5598 {
5599 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5600 	struct gaudi2_cache_invld_params invld_params;
5601 	int rc = 0;
5602 
5603 	if (hdev->reset_info.hard_reset_pending)
5604 		return rc;
5605 
5606 	invld_params.range_invalidation = false;
5607 	invld_params.inv_start_val = 1;
5608 
5609 	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5610 		invld_params.flags = flags;
5611 		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5612 		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5613 										&invld_params);
5614 	} else if (flags & MMU_OP_PHYS_PACK) {
5615 		invld_params.flags = 0;
5616 		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5617 	}
5618 
5619 	return rc;
5620 }
5621 
5622 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
5623 				u32 flags, u32 asid, u64 va, u64 size)
5624 {
5625 	struct gaudi2_cache_invld_params invld_params = {0};
5626 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5627 	u64 start_va, end_va;
5628 	u32 inv_start_val;
5629 	int rc = 0;
5630 
5631 	if (hdev->reset_info.hard_reset_pending)
5632 		return 0;
5633 
5634 	inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT |
5635 			1 << MMU_RANGE_INV_ASID_EN_SHIFT |
5636 			asid << MMU_RANGE_INV_ASID_SHIFT);
5637 	start_va = va;
5638 	end_va = start_va + size;
5639 
5640 	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5641 		/* As range invalidation does not support zero address we will
5642 		 * do full invalidation in this case
5643 		 */
5644 		if (start_va) {
5645 			invld_params.range_invalidation = true;
5646 			invld_params.start_va = start_va;
5647 			invld_params.end_va = end_va;
5648 			invld_params.inv_start_val = inv_start_val;
5649 			invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE;
5650 		} else {
5651 			invld_params.range_invalidation = false;
5652 			invld_params.inv_start_val = 1;
5653 			invld_params.flags = flags;
5654 		}
5655 
5656 
5657 		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5658 		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5659 										&invld_params);
5660 		if (rc)
5661 			return rc;
5662 
5663 	} else if (flags & MMU_OP_PHYS_PACK) {
5664 		invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va);
5665 		invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va);
5666 		invld_params.inv_start_val = inv_start_val;
5667 		invld_params.flags = flags;
5668 		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5669 	}
5670 
5671 	return rc;
5672 }
5673 
5674 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
5675 {
5676 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5677 	u64 hop0_addr;
5678 	u32 asid, max_asid = prop->max_asid;
5679 	int rc;
5680 
5681 	/* it takes too much time to init all of the ASIDs on palladium */
5682 	if (hdev->pldm)
5683 		max_asid = min((u32) 8, max_asid);
5684 
5685 	for (asid = 0 ; asid < max_asid ; asid++) {
5686 		hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
5687 		rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
5688 		if (rc) {
5689 			dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
5690 			return rc;
5691 		}
5692 	}
5693 
5694 	return 0;
5695 }
5696 
5697 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base)
5698 {
5699 	u32 status, timeout_usec;
5700 	int rc;
5701 
5702 	if (hdev->pldm || !hdev->pdev)
5703 		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5704 	else
5705 		timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5706 
5707 	WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1);
5708 
5709 	rc = hl_poll_timeout(
5710 		hdev,
5711 		stlb_base + STLB_SRAM_INIT_OFFSET,
5712 		status,
5713 		!status,
5714 		1000,
5715 		timeout_usec);
5716 
5717 	if (rc)
5718 		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
5719 
5720 	rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base);
5721 	if (rc)
5722 		return rc;
5723 
5724 	WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
5725 
5726 	rc = hl_poll_timeout(
5727 		hdev,
5728 		stlb_base + STLB_INV_ALL_START_OFFSET,
5729 		status,
5730 		!status,
5731 		1000,
5732 		timeout_usec);
5733 
5734 	if (rc)
5735 		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n");
5736 
5737 	WREG32(mmu_base + MMU_ENABLE_OFFSET, 1);
5738 
5739 	return rc;
5740 }
5741 
5742 static int gaudi2_pci_mmu_init(struct hl_device *hdev)
5743 {
5744 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5745 	u32 mmu_base, stlb_base;
5746 	int rc;
5747 
5748 	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU)
5749 		return 0;
5750 
5751 	mmu_base = mmPMMU_HBW_MMU_BASE;
5752 	stlb_base = mmPMMU_HBW_STLB_BASE;
5753 
5754 	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5755 		(0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
5756 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
5757 		(4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
5758 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) |
5759 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT),
5760 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5761 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5762 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5763 		PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5764 		PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5765 
5766 	WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0);
5767 
5768 	if (PAGE_SIZE == SZ_64K) {
5769 		/* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
5770 		RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
5771 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
5772 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
5773 			FIELD_PREP(
5774 				DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK,
5775 				1),
5776 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK |
5777 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK |
5778 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
5779 	}
5780 
5781 	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
5782 
5783 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5784 	if (rc)
5785 		return rc;
5786 
5787 	gaudi2->hw_cap_initialized |= HW_CAP_PMMU;
5788 
5789 	return 0;
5790 }
5791 
5792 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
5793 				int hmmu_id)
5794 {
5795 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5796 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5797 	u32 offset, mmu_base, stlb_base, hw_cap;
5798 	u8 dmmu_seq;
5799 	int rc;
5800 
5801 	dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id;
5802 	hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq;
5803 
5804 	/*
5805 	 * return if DMMU is already initialized or if it's not out of
5806 	 * isolation (due to cluster binning)
5807 	 */
5808 	if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq)))
5809 		return 0;
5810 
5811 	offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5812 	mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset;
5813 	stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset;
5814 
5815 	RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
5816 			MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
5817 
5818 	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5819 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
5820 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
5821 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
5822 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) |
5823 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3),
5824 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5825 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5826 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5827 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5828 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5829 
5830 	RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
5831 			STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
5832 
5833 	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
5834 
5835 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5836 	if (rc)
5837 		return rc;
5838 
5839 	gaudi2->hw_cap_initialized |= hw_cap;
5840 
5841 	return 0;
5842 }
5843 
5844 static int gaudi2_hbm_mmu_init(struct hl_device *hdev)
5845 {
5846 	int rc, dcore_id, hmmu_id;
5847 
5848 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5849 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) {
5850 			rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id);
5851 			if (rc)
5852 				return rc;
5853 		}
5854 
5855 	return 0;
5856 }
5857 
5858 static int gaudi2_mmu_init(struct hl_device *hdev)
5859 {
5860 	int rc;
5861 
5862 	rc = gaudi2_pci_mmu_init(hdev);
5863 	if (rc)
5864 		return rc;
5865 
5866 	rc = gaudi2_hbm_mmu_init(hdev);
5867 	if (rc)
5868 		return rc;
5869 
5870 	return 0;
5871 }
5872 
5873 static int gaudi2_hw_init(struct hl_device *hdev)
5874 {
5875 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5876 	int rc;
5877 
5878 	/* Let's mark in the H/W that we have reached this point. We check
5879 	 * this value in the reset_before_init function to understand whether
5880 	 * we need to reset the chip before doing H/W init. This register is
5881 	 * cleared by the H/W upon H/W reset
5882 	 */
5883 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
5884 
5885 	/* Perform read from the device to make sure device is up */
5886 	RREG32(mmHW_STATE);
5887 
5888 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
5889 	 * So we set it here and if anyone tries to move it later to
5890 	 * a different address, there will be an error
5891 	 */
5892 	if (hdev->asic_prop.iatu_done_by_fw)
5893 		gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE;
5894 
5895 	/*
5896 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
5897 	 * base address of dram
5898 	 */
5899 	if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
5900 		dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n");
5901 		return -EIO;
5902 	}
5903 
5904 	rc = gaudi2_init_cpu(hdev);
5905 	if (rc) {
5906 		dev_err(hdev->dev, "failed to initialize CPU\n");
5907 		return rc;
5908 	}
5909 
5910 	gaudi2_init_scrambler_hbm(hdev);
5911 	gaudi2_init_kdma(hdev);
5912 
5913 	rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC);
5914 	if (rc) {
5915 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc);
5916 		return rc;
5917 	}
5918 
5919 	rc = gaudi2->cpucp_info_get(hdev);
5920 	if (rc) {
5921 		dev_err(hdev->dev, "Failed to get cpucp info\n");
5922 		return rc;
5923 	}
5924 
5925 	rc = gaudi2_mmu_init(hdev);
5926 	if (rc)
5927 		return rc;
5928 
5929 	gaudi2_init_pdma(hdev);
5930 	gaudi2_init_edma(hdev);
5931 	gaudi2_init_sm(hdev);
5932 	gaudi2_init_tpc(hdev);
5933 	gaudi2_init_mme(hdev);
5934 	gaudi2_init_rotator(hdev);
5935 	gaudi2_init_dec(hdev);
5936 	gaudi2_enable_timestamp(hdev);
5937 
5938 	rc = gaudi2_coresight_init(hdev);
5939 	if (rc)
5940 		goto disable_queues;
5941 
5942 	rc = gaudi2_enable_msix(hdev);
5943 	if (rc)
5944 		goto disable_queues;
5945 
5946 	/* Perform read from the device to flush all configuration */
5947 	RREG32(mmHW_STATE);
5948 
5949 	return 0;
5950 
5951 disable_queues:
5952 	gaudi2_disable_dma_qmans(hdev);
5953 	gaudi2_disable_mme_qmans(hdev);
5954 	gaudi2_disable_tpc_qmans(hdev);
5955 	gaudi2_disable_rot_qmans(hdev);
5956 	gaudi2_disable_nic_qmans(hdev);
5957 
5958 	gaudi2_disable_timestamp(hdev);
5959 
5960 	return rc;
5961 }
5962 
5963 /**
5964  * gaudi2_send_hard_reset_cmd - common function to handle reset
5965  *
5966  * @hdev: pointer to the habanalabs device structure
5967  *
5968  * This function handles the various possible scenarios for reset.
5969  * It considers if reset is handled by driver\FW and what FW components are loaded
5970  */
5971 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
5972 {
5973 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5974 	bool heartbeat_reset, preboot_only, cpu_initialized = false;
5975 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5976 	u32 cpu_boot_status;
5977 
5978 	preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU);
5979 	heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT);
5980 
5981 	/*
5982 	 * Handle corner case where failure was at cpu management app load,
5983 	 * and driver didn't detect any failure while loading the FW,
5984 	 * then at such scenario driver will send only HALT_MACHINE
5985 	 * and no one will respond to this request since FW already back to preboot
5986 	 * and it cannot handle such cmd.
5987 	 * In this case next time the management app loads it'll check on events register
5988 	 * which will still have the halt indication, and will reboot the device.
5989 	 * The solution is to let preboot clear all relevant registers before next boot
5990 	 * once driver send COMMS_RST_DEV.
5991 	 */
5992 	cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS);
5993 
5994 	if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) &&
5995 			(cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL))
5996 		cpu_initialized = true;
5997 
5998 	/*
5999 	 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways:
6000 	 * 1. FW reset: FW initiate the reset sequence
6001 	 * 2. driver reset: FW will start HALT sequence (the preparations for the
6002 	 *                  reset but not the reset itself as it is not implemented
6003 	 *                  on their part) and LKD will wait to let FW complete the
6004 	 *                  sequence before issuing the reset
6005 	 */
6006 	if (!preboot_only && cpu_initialized) {
6007 		WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq),
6008 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id);
6009 
6010 		msleep(GAUDI2_CPU_RESET_WAIT_MSEC);
6011 	}
6012 
6013 	/*
6014 	 * When working with preboot (without Linux/Boot fit) we can
6015 	 * communicate only using the COMMS commands to issue halt/reset.
6016 	 *
6017 	 * For the case in which we are working with Linux/Bootfit this is a hail-mary
6018 	 * attempt to revive the card in the small chance that the f/w has
6019 	 * experienced a watchdog event, which caused it to return back to preboot.
6020 	 * In that case, triggering reset through GIC won't help. We need to
6021 	 * trigger the reset as if Linux wasn't loaded.
6022 	 *
6023 	 * We do it only if the reset cause was HB, because that would be the
6024 	 * indication of such an event.
6025 	 *
6026 	 * In case watchdog hasn't expired but we still got HB, then this won't
6027 	 * do any damage.
6028 	 */
6029 
6030 	if (heartbeat_reset || preboot_only || !cpu_initialized) {
6031 		if (hdev->asic_prop.hard_reset_done_by_fw)
6032 			hl_fw_ask_hard_reset_without_linux(hdev);
6033 		else
6034 			hl_fw_ask_halt_machine_without_linux(hdev);
6035 	}
6036 }
6037 
6038 /**
6039  * gaudi2_execute_hard_reset - execute hard reset by driver/FW
6040  *
6041  * @hdev: pointer to the habanalabs device structure
6042  *
6043  * This function executes hard reset based on if driver/FW should do the reset
6044  */
6045 static void gaudi2_execute_hard_reset(struct hl_device *hdev)
6046 {
6047 	if (hdev->asic_prop.hard_reset_done_by_fw) {
6048 		gaudi2_send_hard_reset_cmd(hdev);
6049 		return;
6050 	}
6051 
6052 	/* Set device to handle FLR by H/W as we will put the device
6053 	 * CPU to halt mode
6054 	 */
6055 	WREG32(mmPCIE_AUX_FLR_CTRL,
6056 			(PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
6057 
6058 	gaudi2_send_hard_reset_cmd(hdev);
6059 
6060 	WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
6061 }
6062 
6063 static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us)
6064 {
6065 	int i, rc = 0;
6066 	u32 reg_val;
6067 
6068 	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6069 		rc = hl_poll_timeout(
6070 			hdev,
6071 			mmCPU_RST_STATUS_TO_HOST,
6072 			reg_val,
6073 			reg_val == CPU_RST_STATUS_SOFT_RST_DONE,
6074 			1000,
6075 			poll_timeout_us);
6076 
6077 	if (rc)
6078 		dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n",
6079 				reg_val);
6080 	return rc;
6081 }
6082 
6083 /**
6084  * gaudi2_execute_soft_reset - execute soft reset by driver/FW
6085  *
6086  * @hdev: pointer to the habanalabs device structure
6087  * @driver_performs_reset: true if driver should perform reset instead of f/w.
6088  * @poll_timeout_us: time to wait for response from f/w.
6089  *
6090  * This function executes soft reset based on if driver/FW should do the reset
6091  */
6092 static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset,
6093 						u32 poll_timeout_us)
6094 {
6095 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6096 
6097 	if (!driver_performs_reset) {
6098 		/* set SP to indicate reset request sent to FW */
6099 		if (dyn_regs->cpu_rst_status)
6100 			WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
6101 		else
6102 			WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
6103 
6104 		WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
6105 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
6106 
6107 		return gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us);
6108 	}
6109 
6110 	/* Block access to engines, QMANs and SM during reset, these
6111 	 * RRs will be reconfigured after soft reset.
6112 	 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset.
6113 	 */
6114 	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1,
6115 					mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE);
6116 
6117 	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2,
6118 				mmPCIE_MSIX_BASE + HL_BLOCK_SIZE,
6119 				mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
6120 
6121 	WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
6122 	return 0;
6123 }
6124 
6125 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 poll_timeout_us)
6126 {
6127 	int i, rc = 0;
6128 	u32 reg_val;
6129 
6130 	/* We poll the BTM done indication multiple times after reset due to
6131 	 * a HW errata 'GAUDI2_0300'
6132 	 */
6133 	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6134 		rc = hl_poll_timeout(
6135 			hdev,
6136 			mmPSOC_GLOBAL_CONF_BTM_FSM,
6137 			reg_val,
6138 			reg_val == 0,
6139 			1000,
6140 			poll_timeout_us);
6141 
6142 	if (rc)
6143 		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
6144 }
6145 
6146 static int gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
6147 {
6148 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6149 	u32 poll_timeout_us, reset_sleep_ms;
6150 	bool driver_performs_reset = false;
6151 	int rc;
6152 
6153 	if (hdev->pldm) {
6154 		reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
6155 						GAUDI2_PLDM_SRESET_TIMEOUT_MSEC;
6156 		poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC;
6157 	} else {
6158 		reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC;
6159 		poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC;
6160 	}
6161 
6162 	if (fw_reset)
6163 		goto skip_reset;
6164 
6165 	gaudi2_reset_arcs(hdev);
6166 
6167 	if (hard_reset) {
6168 		driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
6169 		gaudi2_execute_hard_reset(hdev);
6170 	} else {
6171 		/*
6172 		 * As we have to support also work with preboot only (which does not supports
6173 		 * soft reset) we have to make sure that security is disabled before letting driver
6174 		 * do the reset. user shall control the BFE flags to avoid asking soft reset in
6175 		 * secured device with preboot only.
6176 		 */
6177 		driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
6178 							!hdev->asic_prop.fw_security_enabled);
6179 		rc = gaudi2_execute_soft_reset(hdev, driver_performs_reset, poll_timeout_us);
6180 		if (rc)
6181 			return rc;
6182 	}
6183 
6184 skip_reset:
6185 	if (driver_performs_reset || hard_reset) {
6186 		/*
6187 		 * Instead of waiting for BTM indication we should wait for preboot ready:
6188 		 * Consider the below scenario:
6189 		 * 1. FW update is being triggered
6190 		 *        - setting the dirty bit
6191 		 * 2. hard reset will be triggered due to the dirty bit
6192 		 * 3. FW initiates the reset:
6193 		 *        - dirty bit cleared
6194 		 *        - BTM indication cleared
6195 		 *        - preboot ready indication cleared
6196 		 * 4. during hard reset:
6197 		 *        - BTM indication will be set
6198 		 *        - BIST test performed and another reset triggered
6199 		 * 5. only after this reset the preboot will set the preboot ready
6200 		 *
6201 		 * when polling on BTM indication alone we can lose sync with FW while trying to
6202 		 * communicate with FW that is during reset.
6203 		 * to overcome this we will always wait to preboot ready indication
6204 		 */
6205 
6206 		/* without this sleep reset will not work */
6207 		msleep(reset_sleep_ms);
6208 
6209 		if (hdev->fw_components & FW_TYPE_PREBOOT_CPU)
6210 			hl_fw_wait_preboot_ready(hdev);
6211 		else
6212 			gaudi2_poll_btm_indication(hdev, poll_timeout_us);
6213 	}
6214 
6215 	if (!gaudi2)
6216 		return 0;
6217 
6218 	gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
6219 	gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
6220 
6221 	/*
6222 	 * Clear NIC capability mask in order for driver to re-configure
6223 	 * NIC QMANs. NIC ports will not be re-configured during soft
6224 	 * reset as we call gaudi2_nic_init only during hard reset
6225 	 */
6226 	gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK);
6227 
6228 	if (hard_reset) {
6229 		gaudi2->hw_cap_initialized &=
6230 			~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK |
6231 			HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q |
6232 			HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK |
6233 			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA |
6234 			HW_CAP_MME_MASK | HW_CAP_ROT_MASK);
6235 
6236 		memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat));
6237 	} else {
6238 		gaudi2->hw_cap_initialized &=
6239 			~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET |
6240 			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
6241 			HW_CAP_ROT_MASK);
6242 	}
6243 	return 0;
6244 }
6245 
6246 static int gaudi2_suspend(struct hl_device *hdev)
6247 {
6248 	int rc;
6249 
6250 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
6251 	if (rc)
6252 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
6253 
6254 	return rc;
6255 }
6256 
6257 static int gaudi2_resume(struct hl_device *hdev)
6258 {
6259 	return gaudi2_init_iatu(hdev);
6260 }
6261 
6262 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
6263 		void *cpu_addr, dma_addr_t dma_addr, size_t size)
6264 {
6265 	int rc;
6266 
6267 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
6268 			VM_DONTCOPY | VM_NORESERVE);
6269 
6270 #ifdef _HAS_DMA_MMAP_COHERENT
6271 
6272 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
6273 	if (rc)
6274 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
6275 
6276 #else
6277 
6278 	rc = remap_pfn_range(vma, vma->vm_start,
6279 				virt_to_phys(cpu_addr) >> PAGE_SHIFT,
6280 				size, vma->vm_page_prot);
6281 	if (rc)
6282 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
6283 
6284 #endif
6285 
6286 	return rc;
6287 }
6288 
6289 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
6290 {
6291 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6292 	u64 hw_cap_mask = 0;
6293 	u64 hw_tpc_cap_bit = 0;
6294 	u64 hw_nic_cap_bit = 0;
6295 	u64 hw_test_cap_bit = 0;
6296 
6297 	switch (hw_queue_id) {
6298 	case GAUDI2_QUEUE_ID_PDMA_0_0:
6299 	case GAUDI2_QUEUE_ID_PDMA_0_1:
6300 	case GAUDI2_QUEUE_ID_PDMA_1_0:
6301 		hw_cap_mask = HW_CAP_PDMA_MASK;
6302 		break;
6303 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
6304 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
6305 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
6306 		break;
6307 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
6308 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE +
6309 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2);
6310 		break;
6311 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
6312 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE +
6313 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2);
6314 		break;
6315 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
6316 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE +
6317 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2);
6318 		break;
6319 
6320 	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
6321 		hw_test_cap_bit = HW_CAP_MME_SHIFT;
6322 		break;
6323 
6324 	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
6325 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 1;
6326 		break;
6327 
6328 	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
6329 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 2;
6330 		break;
6331 
6332 	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
6333 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 3;
6334 		break;
6335 
6336 	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3:
6337 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT +
6338 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2);
6339 
6340 		/* special case where cap bit refers to the first queue id */
6341 		if (!hw_tpc_cap_bit)
6342 			return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0));
6343 		break;
6344 
6345 	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
6346 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE +
6347 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2);
6348 		break;
6349 
6350 	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
6351 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) +
6352 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2);
6353 		break;
6354 
6355 	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
6356 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) +
6357 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2);
6358 		break;
6359 
6360 	case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
6361 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE);
6362 		break;
6363 
6364 	case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3:
6365 		hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2);
6366 		break;
6367 
6368 	case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3:
6369 		hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2);
6370 
6371 		/* special case where cap bit refers to the first queue id */
6372 		if (!hw_nic_cap_bit)
6373 			return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0));
6374 		break;
6375 
6376 	case GAUDI2_QUEUE_ID_CPU_PQ:
6377 		return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q);
6378 
6379 	default:
6380 		return false;
6381 	}
6382 
6383 	if (hw_tpc_cap_bit)
6384 		return  !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit));
6385 
6386 	if (hw_nic_cap_bit)
6387 		return  !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit));
6388 
6389 	if (hw_test_cap_bit)
6390 		hw_cap_mask = BIT_ULL(hw_test_cap_bit);
6391 
6392 	return !!(gaudi2->hw_cap_initialized & hw_cap_mask);
6393 }
6394 
6395 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id)
6396 {
6397 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6398 
6399 	switch (arc_id) {
6400 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6401 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6402 		return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id));
6403 
6404 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6405 		return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6406 
6407 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6408 		return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6409 
6410 	default:
6411 		return false;
6412 	}
6413 }
6414 
6415 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6416 {
6417 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6418 
6419 	switch (arc_id) {
6420 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6421 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6422 		gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id));
6423 		break;
6424 
6425 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6426 		gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6427 		break;
6428 
6429 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6430 		gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6431 		break;
6432 
6433 	default:
6434 		return;
6435 	}
6436 }
6437 
6438 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6439 {
6440 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6441 
6442 	switch (arc_id) {
6443 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6444 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6445 		gaudi2->active_hw_arc |= BIT_ULL(arc_id);
6446 		break;
6447 
6448 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6449 		gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0);
6450 		break;
6451 
6452 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6453 		gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0);
6454 		break;
6455 
6456 	default:
6457 		return;
6458 	}
6459 }
6460 
6461 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
6462 {
6463 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6464 	u32 pq_offset, reg_base, db_reg_offset, db_value;
6465 
6466 	if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) {
6467 		/*
6468 		 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs.
6469 		 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ
6470 		 * number.
6471 		 */
6472 		pq_offset = (hw_queue_id & 0x3) * 4;
6473 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6474 		db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset;
6475 	} else {
6476 		db_reg_offset = mmCPU_IF_PF_PQ_PI;
6477 	}
6478 
6479 	db_value = pi;
6480 
6481 	/* ring the doorbell */
6482 	WREG32(db_reg_offset, db_value);
6483 
6484 	if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) {
6485 		/* make sure device CPU will read latest data from host */
6486 		mb();
6487 		WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
6488 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
6489 	}
6490 }
6491 
6492 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
6493 {
6494 	__le64 *pbd = (__le64 *) bd;
6495 
6496 	/* The QMANs are on the host memory so a simple copy suffice */
6497 	pqe[0] = pbd[0];
6498 	pqe[1] = pbd[1];
6499 }
6500 
6501 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size,
6502 				dma_addr_t *dma_handle, gfp_t flags)
6503 {
6504 	return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
6505 }
6506 
6507 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size,
6508 				void *cpu_addr, dma_addr_t dma_handle)
6509 {
6510 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
6511 }
6512 
6513 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
6514 				u32 timeout, u64 *result)
6515 {
6516 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6517 
6518 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) {
6519 		if (result)
6520 			*result = 0;
6521 		return 0;
6522 	}
6523 
6524 	if (!timeout)
6525 		timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC;
6526 
6527 	return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result);
6528 }
6529 
6530 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size,
6531 				gfp_t mem_flags, dma_addr_t *dma_handle)
6532 {
6533 	if (size > GAUDI2_DMA_POOL_BLK_SIZE)
6534 		return NULL;
6535 
6536 	return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
6537 }
6538 
6539 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
6540 {
6541 	dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
6542 }
6543 
6544 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
6545 						dma_addr_t *dma_handle)
6546 {
6547 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
6548 }
6549 
6550 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
6551 {
6552 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
6553 }
6554 
6555 static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
6556 					enum dma_data_direction dir)
6557 {
6558 	dma_addr_t dma_addr;
6559 
6560 	dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
6561 	if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
6562 		return 0;
6563 
6564 	return dma_addr;
6565 }
6566 
6567 static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
6568 					enum dma_data_direction dir)
6569 {
6570 	dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
6571 }
6572 
6573 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
6574 {
6575 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
6576 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6577 
6578 	if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
6579 		dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
6580 		return -EINVAL;
6581 	}
6582 
6583 	/* Just check if CB address is valid */
6584 
6585 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6586 					parser->user_cb_size,
6587 					asic_prop->sram_user_base_address,
6588 					asic_prop->sram_end_address))
6589 		return 0;
6590 
6591 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6592 					parser->user_cb_size,
6593 					asic_prop->dram_user_base_address,
6594 					asic_prop->dram_end_address))
6595 		return 0;
6596 
6597 	if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) &&
6598 		hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6599 						parser->user_cb_size,
6600 						asic_prop->dmmu.start_addr,
6601 						asic_prop->dmmu.end_addr))
6602 		return 0;
6603 
6604 	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) {
6605 		if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6606 					parser->user_cb_size,
6607 					asic_prop->pmmu.start_addr,
6608 					asic_prop->pmmu.end_addr) ||
6609 			hl_mem_area_inside_range(
6610 					(u64) (uintptr_t) parser->user_cb,
6611 					parser->user_cb_size,
6612 					asic_prop->pmmu_huge.start_addr,
6613 					asic_prop->pmmu_huge.end_addr))
6614 			return 0;
6615 
6616 	} else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) {
6617 		if (!hdev->pdev)
6618 			return 0;
6619 
6620 		if (!device_iommu_mapped(&hdev->pdev->dev))
6621 			return 0;
6622 	}
6623 
6624 	dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n",
6625 		parser->user_cb, parser->user_cb_size);
6626 
6627 	return -EFAULT;
6628 }
6629 
6630 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
6631 {
6632 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6633 
6634 	if (!parser->is_kernel_allocated_cb)
6635 		return gaudi2_validate_cb_address(hdev, parser);
6636 
6637 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
6638 		dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n");
6639 		return -EINVAL;
6640 	}
6641 
6642 	return 0;
6643 }
6644 
6645 static int gaudi2_send_heartbeat(struct hl_device *hdev)
6646 {
6647 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6648 
6649 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6650 		return 0;
6651 
6652 	return hl_fw_send_heartbeat(hdev);
6653 }
6654 
6655 /* This is an internal helper function, used to update the KDMA mmu props.
6656  * Should be called with a proper kdma lock.
6657  */
6658 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
6659 					   bool mmu_bypass, u32 asid)
6660 {
6661 	u32 rw_asid, rw_mmu_bp;
6662 
6663 	rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6664 		      (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6665 
6666 	rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) |
6667 			(!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT);
6668 
6669 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid);
6670 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
6671 }
6672 
6673 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
6674 						u32 mon_payload, u32 sync_value)
6675 {
6676 	u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
6677 	u8 mask;
6678 
6679 	sob_offset = sob_id * 4;
6680 	mon_offset = mon_id * 4;
6681 
6682 	/* Reset the SOB value */
6683 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
6684 
6685 	/* Configure this address with CQ_ID 0 because CQ_EN is set */
6686 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
6687 
6688 	/* Configure this address with CS index because CQ_EN is set */
6689 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
6690 
6691 	sync_group_id = sob_id / 8;
6692 	mask = ~(1 << (sob_id & 0x7));
6693 	mode = 1; /* comparison mode is "equal to" */
6694 
6695 	mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
6696 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
6697 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
6698 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
6699 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
6700 }
6701 
6702 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
6703 static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
6704 					u64 src_addr, u64 dst_addr,
6705 					u32 size, bool is_memset)
6706 {
6707 	u32 comp_val, commit_mask, *polling_addr, timeout, status = 0;
6708 	struct hl_cq_entry *cq_base;
6709 	struct hl_cq *cq;
6710 	u64 comp_addr;
6711 	int rc;
6712 
6713 	gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
6714 				GAUDI2_RESERVED_MON_KDMA_COMPLETION,
6715 				GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
6716 
6717 	comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6718 			(GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
6719 
6720 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
6721 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
6722 
6723 	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr));
6724 	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr));
6725 	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr));
6726 	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr));
6727 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr));
6728 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr));
6729 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val);
6730 	WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size);
6731 
6732 	commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
6733 				FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
6734 
6735 	if (is_memset)
6736 		commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1);
6737 
6738 	WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask);
6739 
6740 	/* Wait for completion */
6741 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION];
6742 	cq_base = cq->kernel_address;
6743 	polling_addr = (u32 *)&cq_base[cq->ci];
6744 
6745 	if (hdev->pldm)
6746 		/* for each 1MB 20 second of timeout */
6747 		timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20;
6748 	else
6749 		timeout = KDMA_TIMEOUT_USEC;
6750 
6751 	/* Polling */
6752 	rc = hl_poll_timeout_memory(
6753 			hdev,
6754 			polling_addr,
6755 			status,
6756 			(status == 1),
6757 			1000,
6758 			timeout,
6759 			true);
6760 
6761 	*polling_addr = 0;
6762 
6763 	if (rc) {
6764 		dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n");
6765 		WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT);
6766 		return rc;
6767 	}
6768 
6769 	cq->ci = hl_cq_inc_ptr(cq->ci);
6770 
6771 	return 0;
6772 }
6773 
6774 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val)
6775 {
6776 	u32 i;
6777 
6778 	for (i = 0 ; i < size ; i += sizeof(u32))
6779 		WREG32(addr + i, val);
6780 }
6781 
6782 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable)
6783 {
6784 	u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6785 
6786 	if (enable) {
6787 		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE);
6788 		WREG32(reg_base + QM_PQC_CFG_OFFSET, 0);
6789 	} else {
6790 		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED);
6791 		WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
6792 	}
6793 }
6794 
6795 static int gaudi2_test_queue(struct hl_device *hdev, u32 hw_queue_id)
6796 {
6797 	u32 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
6798 	u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6799 	u32 timeout_usec, tmp, sob_base = 1, sob_val = 0x5a5a;
6800 	struct packet_msg_short *msg_short_pkt;
6801 	dma_addr_t pkt_dma_addr;
6802 	size_t pkt_size;
6803 	int rc;
6804 
6805 	if (hdev->pldm)
6806 		timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
6807 	else
6808 		timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
6809 
6810 	pkt_size = sizeof(*msg_short_pkt);
6811 	msg_short_pkt = hl_asic_dma_pool_zalloc(hdev, pkt_size, GFP_KERNEL, &pkt_dma_addr);
6812 	if (!msg_short_pkt) {
6813 		dev_err(hdev->dev, "Failed to allocate packet for H/W queue %d testing\n",
6814 			hw_queue_id);
6815 		return -ENOMEM;
6816 	}
6817 
6818 	tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
6819 		(1 << GAUDI2_PKT_CTL_EB_SHIFT) |
6820 		(1 << GAUDI2_PKT_CTL_MB_SHIFT) |
6821 		(sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) |
6822 		(sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT);
6823 
6824 	msg_short_pkt->value = cpu_to_le32(sob_val);
6825 	msg_short_pkt->ctl = cpu_to_le32(tmp);
6826 
6827 	/* Reset the SOB value */
6828 	WREG32(sob_addr, 0);
6829 
6830 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
6831 	if (rc) {
6832 		dev_err(hdev->dev, "Failed to send msg_short packet to H/W queue %d\n",
6833 			hw_queue_id);
6834 		goto free_pkt;
6835 	}
6836 
6837 	rc = hl_poll_timeout(
6838 			hdev,
6839 			sob_addr,
6840 			tmp,
6841 			(tmp == sob_val),
6842 			1000,
6843 			timeout_usec);
6844 
6845 	if (rc == -ETIMEDOUT) {
6846 		dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
6847 			hw_queue_id, tmp);
6848 		rc = -EIO;
6849 	}
6850 
6851 	/* Reset the SOB value */
6852 	WREG32(sob_addr, 0);
6853 
6854 free_pkt:
6855 	hl_asic_dma_pool_free(hdev, (void *) msg_short_pkt, pkt_dma_addr);
6856 	return rc;
6857 }
6858 
6859 static int gaudi2_test_cpu_queue(struct hl_device *hdev)
6860 {
6861 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6862 
6863 	/*
6864 	 * check capability here as send_cpu_message() won't update the result
6865 	 * value if no capability
6866 	 */
6867 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6868 		return 0;
6869 
6870 	return hl_fw_test_cpu_queue(hdev);
6871 }
6872 
6873 static int gaudi2_test_queues(struct hl_device *hdev)
6874 {
6875 	int i, rc, ret_val = 0;
6876 
6877 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6878 		if (!gaudi2_is_queue_enabled(hdev, i))
6879 			continue;
6880 
6881 		gaudi2_qman_set_test_mode(hdev, i, true);
6882 		rc = gaudi2_test_queue(hdev, i);
6883 		gaudi2_qman_set_test_mode(hdev, i, false);
6884 
6885 		if (rc) {
6886 			ret_val = -EINVAL;
6887 			goto done;
6888 		}
6889 	}
6890 
6891 	rc = gaudi2_test_cpu_queue(hdev);
6892 	if (rc) {
6893 		ret_val = -EINVAL;
6894 		goto done;
6895 	}
6896 
6897 done:
6898 	return ret_val;
6899 }
6900 
6901 static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
6902 {
6903 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6904 	size_t irq_arr_size;
6905 	int rc;
6906 
6907 	gaudi2_init_arcs(hdev);
6908 
6909 	rc = gaudi2_scrub_arcs_dccm(hdev);
6910 	if (rc) {
6911 		dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
6912 		return rc;
6913 	}
6914 
6915 	gaudi2_init_security(hdev);
6916 
6917 	/* Unmask all IRQs since some could have been received during the soft reset */
6918 	irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]);
6919 	return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
6920 }
6921 
6922 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
6923 		struct engines_data *e)
6924 {
6925 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
6926 	struct asic_fixed_properties *prop = &hdev->asic_prop;
6927 	unsigned long *mask = (unsigned long *) mask_arr;
6928 	const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#-15x%#x\n";
6929 	bool is_idle = true, is_eng_idle;
6930 	int engine_idx, i, j;
6931 	u64 offset;
6932 
6933 	if (e)
6934 		hl_engine_data_sprintf(e,
6935 			"\nCORE  EDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0  DMA_CORE_STS1\n"
6936 			"----  ----  -------  ------------  -------------  -------------\n");
6937 
6938 	for (i = 0; i < NUM_OF_DCORES; i++) {
6939 		for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
6940 			int seq = i * NUM_OF_EDMA_PER_DCORE + j;
6941 
6942 			if (!(prop->edma_enabled_mask & BIT(seq)))
6943 				continue;
6944 
6945 			engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 +
6946 					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
6947 			offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
6948 
6949 			dma_core_sts0 = RREG32(mmDCORE0_EDMA0_CORE_STS0 + offset);
6950 			dma_core_sts1 = RREG32(mmDCORE0_EDMA0_CORE_STS1 + offset);
6951 
6952 			qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
6953 			qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
6954 			qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
6955 
6956 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6957 					IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
6958 			is_idle &= is_eng_idle;
6959 
6960 			if (mask && !is_eng_idle)
6961 				set_bit(engine_idx, mask);
6962 
6963 			if (e)
6964 				hl_engine_data_sprintf(e, edma_fmt, i, j, is_eng_idle ? "Y" : "N",
6965 							qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
6966 		}
6967 	}
6968 
6969 	return is_idle;
6970 }
6971 
6972 static bool gaudi2_get_pdma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
6973 		struct engines_data *e)
6974 {
6975 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
6976 	unsigned long *mask = (unsigned long *) mask_arr;
6977 	const char *pdma_fmt = "%-6d%-9s%#-14x%#-15x%#x\n";
6978 	bool is_idle = true, is_eng_idle;
6979 	int engine_idx, i;
6980 	u64 offset;
6981 
6982 	if (e)
6983 		hl_engine_data_sprintf(e,
6984 					"\nPDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0  DMA_CORE_STS1\n"
6985 					"----  -------  ------------  -------------  -------------\n");
6986 
6987 	for (i = 0 ; i < NUM_OF_PDMA ; i++) {
6988 		engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
6989 		offset = i * PDMA_OFFSET;
6990 		dma_core_sts0 = RREG32(mmPDMA0_CORE_STS0 + offset);
6991 		dma_core_sts1 = RREG32(mmPDMA0_CORE_STS1 + offset);
6992 
6993 		qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
6994 		qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
6995 		qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
6996 
6997 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
6998 				IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
6999 		is_idle &= is_eng_idle;
7000 
7001 		if (mask && !is_eng_idle)
7002 			set_bit(engine_idx, mask);
7003 
7004 		if (e)
7005 			hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
7006 						qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7007 	}
7008 
7009 	return is_idle;
7010 }
7011 
7012 static bool gaudi2_get_nic_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7013 		struct engines_data *e)
7014 {
7015 	unsigned long *mask = (unsigned long *) mask_arr;
7016 	const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
7017 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7018 	bool is_idle = true, is_eng_idle;
7019 	int engine_idx, i;
7020 	u64 offset = 0;
7021 
7022 	/* NIC, twelve macros in Full chip */
7023 	if (e && hdev->nic_ports_mask)
7024 		hl_engine_data_sprintf(e,
7025 					"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
7026 					"---  -------  ------------  ----------\n");
7027 
7028 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
7029 		if (!(i & 1))
7030 			offset = i / 2 * NIC_OFFSET;
7031 		else
7032 			offset += NIC_QM_OFFSET;
7033 
7034 		if (!(hdev->nic_ports_mask & BIT(i)))
7035 			continue;
7036 
7037 		engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i;
7038 
7039 
7040 		qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
7041 		qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset);
7042 		qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
7043 
7044 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7045 		is_idle &= is_eng_idle;
7046 
7047 		if (mask && !is_eng_idle)
7048 			set_bit(engine_idx, mask);
7049 
7050 		if (e)
7051 			hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
7052 						qm_glbl_sts0, qm_cgm_sts);
7053 	}
7054 
7055 	return is_idle;
7056 }
7057 
7058 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7059 		struct engines_data *e)
7060 {
7061 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, mme_arch_sts;
7062 	unsigned long *mask = (unsigned long *) mask_arr;
7063 	const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
7064 	bool is_idle = true, is_eng_idle;
7065 	int engine_idx, i;
7066 	u64 offset;
7067 
7068 	if (e)
7069 		hl_engine_data_sprintf(e,
7070 					"\nMME  Stub  is_idle  QM_GLBL_STS0  MME_ARCH_STATUS\n"
7071 					"---  ----  -------  ------------  ---------------\n");
7072 	/* MME, one per Dcore */
7073 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7074 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
7075 		offset = i * DCORE_OFFSET;
7076 
7077 		qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset);
7078 		qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset);
7079 		qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset);
7080 
7081 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7082 		is_idle &= is_eng_idle;
7083 
7084 		mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset);
7085 		is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
7086 		is_idle &= is_eng_idle;
7087 
7088 		if (e)
7089 			hl_engine_data_sprintf(e, mme_fmt, i, "N",
7090 				is_eng_idle ? "Y" : "N",
7091 				qm_glbl_sts0,
7092 				mme_arch_sts);
7093 
7094 		if (mask && !is_eng_idle)
7095 			set_bit(engine_idx, mask);
7096 	}
7097 
7098 	return is_idle;
7099 }
7100 
7101 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
7102 					struct iterate_module_ctx *ctx)
7103 {
7104 	struct gaudi2_tpc_idle_data *idle_data = ctx->data;
7105 	u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7106 	bool is_eng_idle;
7107 	int engine_idx;
7108 
7109 	if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
7110 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7111 	else
7112 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
7113 				dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
7114 
7115 	tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
7116 	qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
7117 	qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
7118 	qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
7119 
7120 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7121 						IS_TPC_IDLE(tpc_cfg_sts);
7122 	*(idle_data->is_idle) &= is_eng_idle;
7123 
7124 	if (idle_data->mask && !is_eng_idle)
7125 		set_bit(engine_idx, idle_data->mask);
7126 
7127 	if (idle_data->e)
7128 		hl_engine_data_sprintf(idle_data->e,
7129 					idle_data->tpc_fmt, dcore, inst,
7130 					is_eng_idle ? "Y" : "N",
7131 					qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
7132 }
7133 
7134 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7135 		struct engines_data *e)
7136 {
7137 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7138 	unsigned long *mask = (unsigned long *) mask_arr;
7139 	bool is_idle = true;
7140 
7141 	struct gaudi2_tpc_idle_data tpc_idle_data = {
7142 		.tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
7143 		.e = e,
7144 		.mask = mask,
7145 		.is_idle = &is_idle,
7146 	};
7147 	struct iterate_module_ctx tpc_iter = {
7148 		.fn = &gaudi2_is_tpc_engine_idle,
7149 		.data = &tpc_idle_data,
7150 	};
7151 
7152 	if (e && prop->tpc_enabled_mask)
7153 		hl_engine_data_sprintf(e,
7154 			"\nCORE  TPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  STATUS\n"
7155 			"----  ---  -------  ------------  ----------  ------\n");
7156 
7157 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
7158 
7159 	return tpc_idle_data.is_idle;
7160 }
7161 
7162 static bool gaudi2_get_decoder_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7163 		struct engines_data *e)
7164 {
7165 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7166 	unsigned long *mask = (unsigned long *) mask_arr;
7167 	const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
7168 	const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
7169 	bool is_idle = true, is_eng_idle;
7170 	u32 dec_swreg15, dec_enabled_bit;
7171 	int engine_idx, i, j;
7172 	u64 offset;
7173 
7174 	/* Decoders, two each Dcore and two shared PCIe decoders */
7175 	if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
7176 		hl_engine_data_sprintf(e,
7177 			"\nCORE  DEC  is_idle  VSI_CMD_SWREG15\n"
7178 			"----  ---  -------  ---------------\n");
7179 
7180 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7181 		for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) {
7182 			dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j);
7183 			if (!(prop->decoder_enabled_mask & dec_enabled_bit))
7184 				continue;
7185 
7186 			engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 +
7187 					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7188 			offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET;
7189 
7190 			dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset);
7191 			is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7192 			is_idle &= is_eng_idle;
7193 
7194 			if (mask && !is_eng_idle)
7195 				set_bit(engine_idx, mask);
7196 
7197 			if (e)
7198 				hl_engine_data_sprintf(e, dec_fmt, i, j,
7199 							is_eng_idle ? "Y" : "N", dec_swreg15);
7200 		}
7201 	}
7202 
7203 	if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
7204 		hl_engine_data_sprintf(e,
7205 			"\nPCIe DEC  is_idle  VSI_CMD_SWREG15\n"
7206 			"--------  -------  ---------------\n");
7207 
7208 	/* Check shared(PCIe) decoders */
7209 	for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) {
7210 		dec_enabled_bit = PCIE_DEC_SHIFT + i;
7211 		if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit)))
7212 			continue;
7213 
7214 		engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i;
7215 		offset = i * DCORE_DEC_OFFSET;
7216 		dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset);
7217 		is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7218 		is_idle &= is_eng_idle;
7219 
7220 		if (mask && !is_eng_idle)
7221 			set_bit(engine_idx, mask);
7222 
7223 		if (e)
7224 			hl_engine_data_sprintf(e, pcie_dec_fmt, i,
7225 						is_eng_idle ? "Y" : "N", dec_swreg15);
7226 	}
7227 
7228 	return is_idle;
7229 }
7230 
7231 static bool gaudi2_get_rotator_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7232 		struct engines_data *e)
7233 {
7234 	const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-14x%#x\n";
7235 	unsigned long *mask = (unsigned long *) mask_arr;
7236 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7237 	bool is_idle = true, is_eng_idle;
7238 	int engine_idx, i;
7239 	u64 offset;
7240 
7241 	if (e)
7242 		hl_engine_data_sprintf(e,
7243 			"\nCORE  ROT  is_idle  QM_GLBL_STS0  QM_GLBL_STS1  QM_CGM_STS\n"
7244 			"----  ---  -------  ------------  ------------  ----------\n");
7245 
7246 	for (i = 0 ; i < NUM_OF_ROT ; i++) {
7247 		engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
7248 
7249 		offset = i * ROT_OFFSET;
7250 
7251 		qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset);
7252 		qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset);
7253 		qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset);
7254 
7255 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7256 		is_idle &= is_eng_idle;
7257 
7258 		if (mask && !is_eng_idle)
7259 			set_bit(engine_idx, mask);
7260 
7261 		if (e)
7262 			hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
7263 						qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7264 	}
7265 
7266 	return is_idle;
7267 }
7268 
7269 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7270 					struct engines_data *e)
7271 {
7272 	bool is_idle = true;
7273 
7274 	is_idle &= gaudi2_get_edma_idle_status(hdev, mask_arr, mask_len, e);
7275 	is_idle &= gaudi2_get_pdma_idle_status(hdev, mask_arr, mask_len, e);
7276 	is_idle &= gaudi2_get_nic_idle_status(hdev, mask_arr, mask_len, e);
7277 	is_idle &= gaudi2_get_mme_idle_status(hdev, mask_arr, mask_len, e);
7278 	is_idle &= gaudi2_get_tpc_idle_status(hdev, mask_arr, mask_len, e);
7279 	is_idle &= gaudi2_get_decoder_idle_status(hdev, mask_arr, mask_len, e);
7280 	is_idle &= gaudi2_get_rotator_idle_status(hdev, mask_arr, mask_len, e);
7281 
7282 	return is_idle;
7283 }
7284 
7285 static void gaudi2_hw_queues_lock(struct hl_device *hdev)
7286 	__acquires(&gaudi2->hw_queues_lock)
7287 {
7288 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7289 
7290 	spin_lock(&gaudi2->hw_queues_lock);
7291 }
7292 
7293 static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
7294 	__releases(&gaudi2->hw_queues_lock)
7295 {
7296 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7297 
7298 	spin_unlock(&gaudi2->hw_queues_lock);
7299 }
7300 
7301 static u32 gaudi2_get_pci_id(struct hl_device *hdev)
7302 {
7303 	return hdev->pdev->device;
7304 }
7305 
7306 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
7307 {
7308 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7309 
7310 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
7311 		return 0;
7312 
7313 	return hl_fw_get_eeprom_data(hdev, data, max_size);
7314 }
7315 
7316 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val)
7317 {
7318 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
7319 }
7320 
7321 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7322 {
7323 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7324 
7325 	if (aggregate) {
7326 		*size = (u32) sizeof(gaudi2->events_stat_aggregate);
7327 		return gaudi2->events_stat_aggregate;
7328 	}
7329 
7330 	*size = (u32) sizeof(gaudi2->events_stat);
7331 	return gaudi2->events_stat;
7332 }
7333 
7334 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id,
7335 				int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7336 {
7337 	u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) *
7338 			dcore_vdec_id + DCORE_OFFSET * dcore_id;
7339 
7340 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7341 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7342 
7343 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7344 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7345 
7346 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7347 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7348 
7349 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7350 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7351 
7352 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7353 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7354 }
7355 
7356 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid)
7357 {
7358 	u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7359 			(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7360 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7361 	u32 dcore_offset = dcore_id * DCORE_OFFSET;
7362 	u32 vdec_id, i, ports_offset, reg_val;
7363 	u8 edma_seq_base;
7364 
7365 	/* EDMA */
7366 	edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE;
7367 	if (prop->edma_enabled_mask & BIT(edma_seq_base)) {
7368 		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7369 		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7370 		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7371 		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7372 	}
7373 
7374 	if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) {
7375 		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7376 		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7377 		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7378 		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7379 	}
7380 
7381 	/* Sync Mngr */
7382 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid);
7383 	/*
7384 	 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID
7385 	 * for any access type
7386 	 */
7387 	if (dcore_id > 0) {
7388 		reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) |
7389 			  (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT);
7390 		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val);
7391 		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0);
7392 	}
7393 
7394 	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0);
7395 	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid);
7396 
7397 	for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) {
7398 		ports_offset = i * DCORE_MME_SBTE_OFFSET;
7399 		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP +
7400 				dcore_offset + ports_offset, 0);
7401 		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID +
7402 				dcore_offset + ports_offset, rw_asid);
7403 	}
7404 
7405 	for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) {
7406 		ports_offset = i * DCORE_MME_WB_OFFSET;
7407 		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP +
7408 				dcore_offset + ports_offset, 0);
7409 		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID +
7410 				dcore_offset + ports_offset, rw_asid);
7411 	}
7412 
7413 	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7414 	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7415 
7416 	/*
7417 	 * Decoders
7418 	 */
7419 	for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) {
7420 		if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id))
7421 			gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0);
7422 	}
7423 }
7424 
7425 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev,
7426 				int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7427 {
7428 	u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id;
7429 
7430 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7431 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7432 
7433 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7434 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7435 
7436 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7437 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7438 
7439 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7440 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7441 
7442 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7443 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7444 }
7445 
7446 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id,
7447 							u32 rw_asid, u32 rw_mmu_bp)
7448 {
7449 	u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id;
7450 
7451 	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp);
7452 	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid);
7453 }
7454 
7455 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid)
7456 {
7457 	u32 reg_base, reg_offset, reg_val = 0;
7458 
7459 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
7460 
7461 	/* Enable MMU and configure asid for all relevant ARC regions */
7462 	reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0);
7463 	reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid);
7464 
7465 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL);
7466 	WREG32(reg_base + reg_offset, reg_val);
7467 
7468 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW);
7469 	WREG32(reg_base + reg_offset, reg_val);
7470 
7471 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA);
7472 	WREG32(reg_base + reg_offset, reg_val);
7473 
7474 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA);
7475 	WREG32(reg_base + reg_offset, reg_val);
7476 
7477 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA);
7478 	WREG32(reg_base + reg_offset, reg_val);
7479 
7480 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE);
7481 	WREG32(reg_base + reg_offset, reg_val);
7482 
7483 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL);
7484 	WREG32(reg_base + reg_offset, reg_val);
7485 
7486 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL);
7487 	WREG32(reg_base + reg_offset, reg_val);
7488 
7489 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL);
7490 	WREG32(reg_base + reg_offset, reg_val);
7491 
7492 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL);
7493 	WREG32(reg_base + reg_offset, reg_val);
7494 
7495 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL);
7496 	WREG32(reg_base + reg_offset, reg_val);
7497 }
7498 
7499 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid)
7500 {
7501 	int i;
7502 
7503 	if (hdev->fw_components & FW_TYPE_BOOT_CPU)
7504 		return hl_fw_cpucp_engine_core_asid_set(hdev, asid);
7505 
7506 	for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7507 		gaudi2_arc_mmu_prepare(hdev, i, asid);
7508 
7509 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
7510 		if (!gaudi2_is_queue_enabled(hdev, i))
7511 			continue;
7512 
7513 		gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid);
7514 	}
7515 
7516 	return 0;
7517 }
7518 
7519 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
7520 {
7521 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7522 	u32 rw_asid, offset;
7523 	int rc, i;
7524 
7525 	rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) |
7526 			FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid);
7527 
7528 	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7529 	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7530 	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7531 	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7532 
7533 	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7534 	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7535 	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7536 	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7537 
7538 	/* ROT */
7539 	for (i = 0 ; i < NUM_OF_ROT ; i++) {
7540 		offset = i * ROT_OFFSET;
7541 		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid);
7542 		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7543 		RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK);
7544 		RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK);
7545 		RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK);
7546 	}
7547 
7548 	/* Shared Decoders are the last bits in the decoders mask */
7549 	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0))
7550 		gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0);
7551 
7552 	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1))
7553 		gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0);
7554 
7555 	/* arc farm arc dup eng */
7556 	for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7557 		gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0);
7558 
7559 	rc = gaudi2_arc_mmu_prepare_all(hdev, asid);
7560 	if (rc)
7561 		return rc;
7562 
7563 	return 0;
7564 }
7565 
7566 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst,	u32 offset,
7567 					struct iterate_module_ctx *ctx)
7568 {
7569 	struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
7570 
7571 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
7572 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
7573 	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7574 	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid);
7575 }
7576 
7577 /* zero the MMUBP and set the ASID */
7578 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid)
7579 {
7580 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7581 	struct gaudi2_tpc_mmu_data tpc_mmu_data;
7582 	struct iterate_module_ctx tpc_iter = {
7583 		.fn = &gaudi2_tpc_mmu_prepare,
7584 		.data = &tpc_mmu_data,
7585 	};
7586 	int rc, i;
7587 
7588 	if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) {
7589 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
7590 		return -EINVAL;
7591 	}
7592 
7593 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
7594 		return 0;
7595 
7596 	rc = gaudi2_mmu_shared_prepare(hdev, asid);
7597 	if (rc)
7598 		return rc;
7599 
7600 	/* configure DCORE MMUs */
7601 	tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7602 				(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7603 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
7604 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
7605 		gaudi2_mmu_dcore_prepare(hdev, i, asid);
7606 
7607 	return 0;
7608 }
7609 
7610 static inline bool is_info_event(u32 event)
7611 {
7612 	switch (event) {
7613 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
7614 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
7615 
7616 	/* return in case of NIC status event - these events are received periodically and not as
7617 	 * an indication to an error.
7618 	 */
7619 	case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
7620 		return true;
7621 	default:
7622 		return false;
7623 	}
7624 }
7625 
7626 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
7627 			bool ratelimited, const char *fmt, ...)
7628 {
7629 	struct va_format vaf;
7630 	va_list args;
7631 
7632 	va_start(args, fmt);
7633 	vaf.fmt = fmt;
7634 	vaf.va = &args;
7635 
7636 	if (ratelimited)
7637 		dev_err_ratelimited(hdev->dev, "%s: %pV\n",
7638 			gaudi2_irq_map_table[event_type].valid ?
7639 			gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7640 	else
7641 		dev_err(hdev->dev, "%s: %pV\n",
7642 			gaudi2_irq_map_table[event_type].valid ?
7643 			gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7644 
7645 	va_end(args);
7646 }
7647 
7648 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7649 		struct hl_eq_ecc_data *ecc_data)
7650 {
7651 	u64 ecc_address = 0, ecc_syndrom = 0;
7652 	u8 memory_wrapper_idx = 0;
7653 
7654 	ecc_address = le64_to_cpu(ecc_data->ecc_address);
7655 	ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7656 	memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7657 
7658 	gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
7659 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.",
7660 		ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
7661 
7662 	return !!ecc_data->is_critical;
7663 }
7664 
7665 /*
7666  * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7667  *
7668  * @idx: the current pi/ci value
7669  * @q_len: the queue length (power of 2)
7670  *
7671  * @return the cyclically decremented index
7672  */
7673 static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len)
7674 {
7675 	u32 mask = q_len - 1;
7676 
7677 	/*
7678 	 * modular decrement is equivalent to adding (queue_size -1)
7679 	 * later we take LSBs to make sure the value is in the
7680 	 * range [0, queue_len - 1]
7681 	 */
7682 	return (idx + q_len - 1) & mask;
7683 }
7684 
7685 /**
7686  * gaudi2_print_sw_config_stream_data - print SW config stream data
7687  *
7688  * @hdev: pointer to the habanalabs device structure
7689  * @stream: the QMAN's stream
7690  * @qman_base: base address of QMAN registers block
7691  */
7692 static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev,
7693 						u32 stream, u64 qman_base)
7694 {
7695 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7696 	u32 cq_ptr_lo_off, size;
7697 
7698 	cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0;
7699 
7700 	cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) +
7701 									stream * cq_ptr_lo_off;
7702 
7703 	cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
7704 
7705 	cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
7706 
7707 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7708 	size = RREG32(cq_tsize);
7709 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
7710 		stream, cq_ptr, size);
7711 }
7712 
7713 /**
7714  * gaudi2_print_last_pqes_on_err - print last PQEs on error
7715  *
7716  * @hdev: pointer to the habanalabs device structure
7717  * @qid_base: first QID of the QMAN (out of 4 streams)
7718  * @stream: the QMAN's stream
7719  * @qman_base: base address of QMAN registers block
7720  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7721  */
7722 static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream,
7723 						u64 qman_base, bool pr_sw_conf)
7724 {
7725 	u32 ci, qm_ci_stream_off;
7726 	struct hl_hw_queue *q;
7727 	u64 pq_ci;
7728 	int i;
7729 
7730 	q = &hdev->kernel_queues[qid_base + stream];
7731 
7732 	qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0;
7733 	pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) +
7734 						stream * qm_ci_stream_off;
7735 
7736 	hdev->asic_funcs->hw_queues_lock(hdev);
7737 
7738 	if (pr_sw_conf)
7739 		gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
7740 
7741 	ci = RREG32(pq_ci);
7742 
7743 	/* we should start printing form ci -1 */
7744 	ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
7745 
7746 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7747 		struct hl_bd *bd;
7748 		u64 addr;
7749 		u32 len;
7750 
7751 		bd = q->kernel_address;
7752 		bd += ci;
7753 
7754 		len = le32_to_cpu(bd->len);
7755 		/* len 0 means uninitialized entry- break */
7756 		if (!len)
7757 			break;
7758 
7759 		addr = le64_to_cpu(bd->ptr);
7760 
7761 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7762 			stream, ci, addr, len);
7763 
7764 		/* get previous ci, wrap if needed */
7765 		ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
7766 	}
7767 
7768 	hdev->asic_funcs->hw_queues_unlock(hdev);
7769 }
7770 
7771 /**
7772  * print_qman_data_on_err - extract QMAN data on error
7773  *
7774  * @hdev: pointer to the habanalabs device structure
7775  * @qid_base: first QID of the QMAN (out of 4 streams)
7776  * @stream: the QMAN's stream
7777  * @qman_base: base address of QMAN registers block
7778  *
7779  * This function attempt to extract as much data as possible on QMAN error.
7780  * On upper CP print the SW config stream data and last 8 PQEs.
7781  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7782  */
7783 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base)
7784 {
7785 	u32 i;
7786 
7787 	if (stream != QMAN_STREAMS) {
7788 		gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true);
7789 		return;
7790 	}
7791 
7792 	gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
7793 
7794 	for (i = 0 ; i < QMAN_STREAMS ; i++)
7795 		gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false);
7796 }
7797 
7798 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
7799 							u64 qman_base, u32 qid_base)
7800 {
7801 	u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
7802 	u64 glbl_sts_addr, arb_err_addr;
7803 	char reg_desc[32];
7804 
7805 	glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE);
7806 	arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE);
7807 
7808 	/* Iterate through all stream GLBL_ERR_STS registers + Lower CP */
7809 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7810 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7811 
7812 		if (!glbl_sts_val)
7813 			continue;
7814 
7815 		if (i == QMAN_STREAMS) {
7816 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7817 			num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE;
7818 		} else {
7819 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7820 			num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
7821 		}
7822 
7823 		for (j = 0 ; j < num_error_causes ; j++)
7824 			if (glbl_sts_val & BIT(j)) {
7825 				gaudi2_print_event(hdev, event_type, true,
7826 					"%s. err cause: %s", reg_desc,
7827 					i == QMAN_STREAMS ?
7828 					gaudi2_qman_lower_cp_error_cause[j] :
7829 					gaudi2_qman_error_cause[j]);
7830 				error_count++;
7831 			}
7832 
7833 		print_qman_data_on_err(hdev, qid_base, i, qman_base);
7834 	}
7835 
7836 	arb_err_val = RREG32(arb_err_addr);
7837 
7838 	if (!arb_err_val)
7839 		goto out;
7840 
7841 	for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7842 		if (arb_err_val & BIT(j)) {
7843 			gaudi2_print_event(hdev, event_type, true,
7844 				"ARB_ERR. err cause: %s",
7845 				gaudi2_qman_arb_error_cause[j]);
7846 			error_count++;
7847 		}
7848 	}
7849 
7850 out:
7851 	return error_count;
7852 }
7853 
7854 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
7855 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7856 			enum gaudi2_engine_id id, u64 *event_mask)
7857 {
7858 	u32 razwi_hi, razwi_lo, razwi_xy;
7859 	u16 eng_id = id;
7860 	u8 rd_wr_flag;
7861 
7862 	if (is_write) {
7863 		razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI);
7864 		razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO);
7865 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY);
7866 		rd_wr_flag = HL_RAZWI_WRITE;
7867 	} else {
7868 		razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
7869 		razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO);
7870 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY);
7871 		rd_wr_flag = HL_RAZWI_READ;
7872 	}
7873 
7874 	hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
7875 				rd_wr_flag | HL_RAZWI_HBW, event_mask);
7876 
7877 	dev_err_ratelimited(hdev->dev,
7878 		"%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
7879 		name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
7880 }
7881 
7882 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
7883 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7884 			enum gaudi2_engine_id id, u64 *event_mask)
7885 {
7886 	u64 razwi_addr = CFG_BASE;
7887 	u32 razwi_xy;
7888 	u16 eng_id = id;
7889 	u8 rd_wr_flag;
7890 
7891 	if (is_write) {
7892 		razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
7893 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
7894 		rd_wr_flag = HL_RAZWI_WRITE;
7895 	} else {
7896 		razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
7897 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
7898 		rd_wr_flag = HL_RAZWI_READ;
7899 	}
7900 
7901 	hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask);
7902 	dev_err_ratelimited(hdev->dev,
7903 				"%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%llX Initiator coordinates 0x%x\n",
7904 				name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
7905 						razwi_xy);
7906 }
7907 
7908 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
7909 						enum razwi_event_sources module, u8 module_idx)
7910 {
7911 	switch (module) {
7912 	case RAZWI_TPC:
7913 		if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES))
7914 			return GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7915 		return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7916 				(module_idx % NUM_OF_TPC_PER_DCORE) +
7917 				(GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
7918 
7919 	case RAZWI_MME:
7920 		return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) +
7921 			(module_idx * ENGINE_ID_DCORE_OFFSET));
7922 
7923 	case RAZWI_EDMA:
7924 		return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7925 			(module_idx % NUM_OF_EDMA_PER_DCORE));
7926 
7927 	case RAZWI_PDMA:
7928 		return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx);
7929 
7930 	case RAZWI_NIC:
7931 		return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx));
7932 
7933 	case RAZWI_DEC:
7934 		if (module_idx == 8)
7935 			return GAUDI2_PCIE_ENGINE_ID_DEC_0;
7936 
7937 		if (module_idx == 9)
7938 			return GAUDI2_PCIE_ENGINE_ID_DEC_1;
7939 					;
7940 		return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7941 				(module_idx % NUM_OF_DEC_PER_DCORE) +
7942 				(GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
7943 
7944 	case RAZWI_ROT:
7945 		return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
7946 
7947 	default:
7948 		return GAUDI2_ENGINE_ID_SIZE;
7949 	}
7950 }
7951 
7952 /*
7953  * This function handles RR(Range register) hit events.
7954  * raised be initiators not PSOC RAZWI.
7955  */
7956 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
7957 				enum razwi_event_sources module, u8 module_idx,
7958 				u8 module_sub_idx, u64 *event_mask)
7959 {
7960 	bool via_sft = false;
7961 	u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, eng_id;
7962 	u64 hbw_rtr_mstr_if_base_addr, lbw_rtr_mstr_if_base_addr;
7963 	u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
7964 	u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
7965 	char initiator_name[64];
7966 
7967 	switch (module) {
7968 	case RAZWI_TPC:
7969 		hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx];
7970 
7971 		if (hl_is_fw_ver_below_1_9(hdev) &&
7972 				!hdev->asic_prop.fw_security_enabled &&
7973 				((module_idx == 0) || (module_idx == 1)))
7974 			lbw_rtr_id = DCORE0_RTR0;
7975 		else
7976 			lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx];
7977 		sprintf(initiator_name, "TPC_%u", module_idx);
7978 		break;
7979 	case RAZWI_MME:
7980 		sprintf(initiator_name, "MME_%u", module_idx);
7981 		switch (module_sub_idx) {
7982 		case MME_WAP0:
7983 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
7984 			break;
7985 		case MME_WAP1:
7986 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
7987 			break;
7988 		case MME_WRITE:
7989 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
7990 			break;
7991 		case MME_READ:
7992 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
7993 			break;
7994 		case MME_SBTE0:
7995 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
7996 			break;
7997 		case MME_SBTE1:
7998 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
7999 			break;
8000 		case MME_SBTE2:
8001 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
8002 			break;
8003 		case MME_SBTE3:
8004 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
8005 			break;
8006 		case MME_SBTE4:
8007 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
8008 			break;
8009 		default:
8010 			return;
8011 		}
8012 		lbw_rtr_id = hbw_rtr_id;
8013 		break;
8014 	case RAZWI_EDMA:
8015 		hbw_rtr_mstr_if_base_addr = gaudi2_edma_initiator_hbw_sft[module_idx];
8016 		dcore_id = module_idx / NUM_OF_EDMA_PER_DCORE;
8017 		/* SFT has separate MSTR_IF for LBW, only there we can
8018 		 * read the LBW razwi related registers
8019 		 */
8020 		lbw_rtr_mstr_if_base_addr = mmSFT0_LBW_RTR_IF_MSTR_IF_RR_SHRD_HBW_BASE +
8021 								dcore_id * SFT_DCORE_OFFSET;
8022 		via_sft = true;
8023 		sprintf(initiator_name, "EDMA_%u", module_idx);
8024 		break;
8025 	case RAZWI_PDMA:
8026 		hbw_rtr_id = gaudi2_pdma_initiator_hbw_rtr_id[module_idx];
8027 		lbw_rtr_id = gaudi2_pdma_initiator_lbw_rtr_id[module_idx];
8028 		sprintf(initiator_name, "PDMA_%u", module_idx);
8029 		break;
8030 	case RAZWI_NIC:
8031 		hbw_rtr_id = gaudi2_nic_initiator_hbw_rtr_id[module_idx];
8032 		lbw_rtr_id = gaudi2_nic_initiator_lbw_rtr_id[module_idx];
8033 		sprintf(initiator_name, "NIC_%u", module_idx);
8034 		break;
8035 	case RAZWI_DEC:
8036 		hbw_rtr_id = gaudi2_dec_initiator_hbw_rtr_id[module_idx];
8037 		lbw_rtr_id = gaudi2_dec_initiator_lbw_rtr_id[module_idx];
8038 		sprintf(initiator_name, "DEC_%u", module_idx);
8039 		break;
8040 	case RAZWI_ROT:
8041 		hbw_rtr_id = gaudi2_rot_initiator_hbw_rtr_id[module_idx];
8042 		lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx];
8043 		sprintf(initiator_name, "ROT_%u", module_idx);
8044 		break;
8045 	default:
8046 		return;
8047 	}
8048 
8049 	/* Find router mstr_if register base */
8050 	if (!via_sft) {
8051 		dcore_id = hbw_rtr_id / NUM_OF_RTR_PER_DCORE;
8052 		dcore_rtr_id = hbw_rtr_id % NUM_OF_RTR_PER_DCORE;
8053 		hbw_rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
8054 				dcore_id * DCORE_OFFSET +
8055 				dcore_rtr_id * DCORE_RTR_OFFSET +
8056 				RTR_MSTR_IF_OFFSET;
8057 		lbw_rtr_mstr_if_base_addr = hbw_rtr_mstr_if_base_addr +
8058 				(((s32)lbw_rtr_id - hbw_rtr_id) * DCORE_RTR_OFFSET);
8059 	}
8060 
8061 	/* Find out event cause by reading "RAZWI_HAPPENED" registers */
8062 	hbw_shrd_aw = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
8063 	hbw_shrd_ar = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
8064 	lbw_shrd_aw = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
8065 	lbw_shrd_ar = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
8066 
8067 	eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx);
8068 	if (hbw_shrd_aw) {
8069 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, true,
8070 						initiator_name, eng_id, event_mask);
8071 
8072 		/* Clear event indication */
8073 		WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
8074 	}
8075 
8076 	if (hbw_shrd_ar) {
8077 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, false,
8078 						initiator_name, eng_id, event_mask);
8079 
8080 		/* Clear event indication */
8081 		WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
8082 	}
8083 
8084 	if (lbw_shrd_aw) {
8085 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, true,
8086 						initiator_name, eng_id, event_mask);
8087 
8088 		/* Clear event indication */
8089 		WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
8090 	}
8091 
8092 	if (lbw_shrd_ar) {
8093 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, false,
8094 						initiator_name, eng_id, event_mask);
8095 
8096 		/* Clear event indication */
8097 		WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
8098 	}
8099 }
8100 
8101 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
8102 {
8103 	struct asic_fixed_properties *prop = &hdev->asic_prop;
8104 	u8 mod_idx, sub_mod;
8105 
8106 	/* check all TPCs */
8107 	for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
8108 		if (prop->tpc_enabled_mask & BIT(mod_idx))
8109 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL);
8110 	}
8111 
8112 	/* check all MMEs */
8113 	for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8114 		for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
8115 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
8116 									sub_mod, NULL);
8117 
8118 	/* check all EDMAs */
8119 	for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8120 		if (prop->edma_enabled_mask & BIT(mod_idx))
8121 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL);
8122 
8123 	/* check all PDMAs */
8124 	for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
8125 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL);
8126 
8127 	/* check all NICs */
8128 	for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
8129 		if (hdev->nic_ports_mask & BIT(mod_idx))
8130 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
8131 								NULL);
8132 
8133 	/* check all DECs */
8134 	for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
8135 		if (prop->decoder_enabled_mask & BIT(mod_idx))
8136 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL);
8137 
8138 	/* check all ROTs */
8139 	for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
8140 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL);
8141 }
8142 
8143 static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u32 array_size,
8144 						u32 axuser_xy, u32 *base, u16 *eng_id,
8145 						char *eng_name)
8146 {
8147 
8148 	int i, num_of_eng = 0;
8149 	u16 str_size = 0;
8150 
8151 	for (i = 0 ; i < array_size ; i++) {
8152 		if (axuser_xy != razwi_info[i].axuser_xy)
8153 			continue;
8154 
8155 		eng_id[num_of_eng] = razwi_info[i].eng_id;
8156 		base[num_of_eng] = razwi_info[i].rtr_ctrl;
8157 		if (!num_of_eng)
8158 			str_size += snprintf(eng_name + str_size,
8159 						PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s",
8160 						razwi_info[i].eng_name);
8161 		else
8162 			str_size += snprintf(eng_name + str_size,
8163 						PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s",
8164 						razwi_info[i].eng_name);
8165 		num_of_eng++;
8166 	}
8167 
8168 	return num_of_eng;
8169 }
8170 
8171 static bool gaudi2_handle_psoc_razwi_happened(struct hl_device *hdev, u32 razwi_reg,
8172 						u64 *event_mask)
8173 {
8174 	u32 axuser_xy = RAZWI_GET_AXUSER_XY(razwi_reg), addr_hi = 0, addr_lo = 0;
8175 	u32 base[PSOC_RAZWI_MAX_ENG_PER_RTR];
8176 	u16 num_of_eng, eng_id[PSOC_RAZWI_MAX_ENG_PER_RTR];
8177 	char eng_name_str[PSOC_RAZWI_ENG_STR_SIZE];
8178 	bool razwi_happened = false;
8179 	int i;
8180 
8181 	num_of_eng = gaudi2_psoc_razwi_get_engines(common_razwi_info, ARRAY_SIZE(common_razwi_info),
8182 							axuser_xy, base, eng_id, eng_name_str);
8183 
8184 	/* If no match for XY coordinates, try to find it in MME razwi table */
8185 	if (!num_of_eng) {
8186 		axuser_xy = RAZWI_GET_AXUSER_LOW_XY(razwi_reg);
8187 		num_of_eng = gaudi2_psoc_razwi_get_engines(mme_razwi_info,
8188 								ARRAY_SIZE(mme_razwi_info),
8189 								axuser_xy, base, eng_id,
8190 								eng_name_str);
8191 	}
8192 
8193 	for  (i = 0 ; i < num_of_eng ; i++) {
8194 		if (RREG32(base[i] + DEC_RAZWI_HBW_AW_SET)) {
8195 			addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_HI);
8196 			addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_LO);
8197 			dev_err(hdev->dev,
8198 					"PSOC HBW AW RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8199 					eng_name_str, ((u64)addr_hi << 32) + addr_lo);
8200 			hl_handle_razwi(hdev, ((u64)addr_hi << 32) + addr_lo, &eng_id[0],
8201 					num_of_eng, HL_RAZWI_HBW | HL_RAZWI_WRITE, event_mask);
8202 			razwi_happened = true;
8203 		}
8204 
8205 		if (RREG32(base[i] + DEC_RAZWI_HBW_AR_SET)) {
8206 			addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_HI);
8207 			addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_LO);
8208 			dev_err(hdev->dev,
8209 					"PSOC HBW AR RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8210 					eng_name_str, ((u64)addr_hi << 32) + addr_lo);
8211 			hl_handle_razwi(hdev, ((u64)addr_hi << 32) + addr_lo, &eng_id[0],
8212 					num_of_eng, HL_RAZWI_HBW | HL_RAZWI_READ, event_mask);
8213 			razwi_happened = true;
8214 		}
8215 
8216 		if (RREG32(base[i] + DEC_RAZWI_LBW_AW_SET)) {
8217 			addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AW_ADDR);
8218 			dev_err(hdev->dev,
8219 					"PSOC LBW AW RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8220 					eng_name_str, addr_lo);
8221 			hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8222 					num_of_eng, HL_RAZWI_LBW | HL_RAZWI_WRITE, event_mask);
8223 			razwi_happened = true;
8224 		}
8225 
8226 		if (RREG32(base[i] + DEC_RAZWI_LBW_AR_SET)) {
8227 			addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AR_ADDR);
8228 			dev_err(hdev->dev,
8229 					"PSOC LBW AR RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8230 					eng_name_str, addr_lo);
8231 			hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8232 					num_of_eng, HL_RAZWI_LBW | HL_RAZWI_READ, event_mask);
8233 			razwi_happened = true;
8234 		}
8235 		/* In common case the loop will break, when there is only one engine id, or
8236 		 * several engines with the same router. The exceptional case is with psoc razwi
8237 		 * from EDMA, where it's possible to get axuser id which fits 2 routers (2
8238 		 * interfaces of sft router). In this case, maybe the first router won't hold info
8239 		 * and we will need to iterate on the other router.
8240 		 */
8241 		if (razwi_happened)
8242 			break;
8243 	}
8244 
8245 	return razwi_happened;
8246 }
8247 
8248 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
8249 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask)
8250 {
8251 	u32 razwi_mask_info, razwi_intr = 0, error_count = 0;
8252 
8253 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
8254 		razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
8255 		if (!razwi_intr)
8256 			return 0;
8257 	}
8258 
8259 	razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
8260 
8261 	dev_err_ratelimited(hdev->dev,
8262 		"PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
8263 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
8264 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
8265 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
8266 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info),
8267 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
8268 
8269 	if (gaudi2_handle_psoc_razwi_happened(hdev, razwi_mask_info, event_mask))
8270 		error_count++;
8271 	else
8272 		dev_err_ratelimited(hdev->dev,
8273 				"PSOC RAZWI interrupt: invalid razwi info (0x%x)\n",
8274 				razwi_mask_info);
8275 
8276 	/* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
8277 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
8278 		WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
8279 
8280 	return error_count;
8281 }
8282 
8283 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type)
8284 {
8285 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8286 
8287 	sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET);
8288 
8289 	for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) {
8290 		if (sts_val & BIT(i)) {
8291 			gaudi2_print_event(hdev, event_type, true,
8292 				"err cause: %s", gaudi2_qm_sei_error_cause[i]);
8293 			sts_clr_val |= BIT(i);
8294 			error_count++;
8295 		}
8296 	}
8297 
8298 	WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val);
8299 
8300 	return error_count;
8301 }
8302 
8303 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
8304 					bool extended_err_check, u64 *event_mask)
8305 {
8306 	enum razwi_event_sources module;
8307 	u32 error_count = 0;
8308 	u64 qman_base;
8309 	u8 index;
8310 
8311 	switch (event_type) {
8312 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP:
8313 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
8314 		qman_base = mmDCORE0_TPC0_QM_BASE +
8315 				(index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
8316 				(index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
8317 		module = RAZWI_TPC;
8318 		break;
8319 	case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
8320 		qman_base = mmDCORE0_TPC6_QM_BASE;
8321 		module = RAZWI_TPC;
8322 		break;
8323 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
8324 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
8325 	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
8326 	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
8327 		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
8328 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
8329 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
8330 		qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
8331 		module = RAZWI_MME;
8332 		break;
8333 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
8334 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
8335 		index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
8336 		qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
8337 		module = RAZWI_PDMA;
8338 		break;
8339 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
8340 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
8341 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
8342 		qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
8343 		module = RAZWI_ROT;
8344 		break;
8345 	default:
8346 		return 0;
8347 	}
8348 
8349 	error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8350 
8351 	/* There is a single event per NIC macro, so should check its both QMAN blocks */
8352 	if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE &&
8353 			event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE)
8354 		error_count += _gaudi2_handle_qm_sei_err(hdev,
8355 					qman_base + NIC_QM_OFFSET, event_type);
8356 
8357 	if (extended_err_check) {
8358 		/* check if RAZWI happened */
8359 		gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, event_mask);
8360 		hl_check_for_glbl_errors(hdev);
8361 	}
8362 
8363 	return error_count;
8364 }
8365 
8366 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8367 {
8368 	u32 qid_base, error_count = 0;
8369 	u64 qman_base;
8370 	u8 index = 0;
8371 
8372 	switch (event_type) {
8373 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
8374 		index = event_type - GAUDI2_EVENT_TPC0_QM;
8375 		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS;
8376 		qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8377 		break;
8378 	case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM:
8379 		index = event_type - GAUDI2_EVENT_TPC6_QM;
8380 		qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS;
8381 		qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8382 		break;
8383 	case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM:
8384 		index = event_type - GAUDI2_EVENT_TPC12_QM;
8385 		qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS;
8386 		qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8387 		break;
8388 	case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM:
8389 		index = event_type - GAUDI2_EVENT_TPC18_QM;
8390 		qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS;
8391 		qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8392 		break;
8393 	case GAUDI2_EVENT_TPC24_QM:
8394 		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
8395 		qman_base = mmDCORE0_TPC6_QM_BASE;
8396 		break;
8397 	case GAUDI2_EVENT_MME0_QM:
8398 		qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
8399 		qman_base = mmDCORE0_MME_QM_BASE;
8400 		break;
8401 	case GAUDI2_EVENT_MME1_QM:
8402 		qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
8403 		qman_base = mmDCORE1_MME_QM_BASE;
8404 		break;
8405 	case GAUDI2_EVENT_MME2_QM:
8406 		qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
8407 		qman_base = mmDCORE2_MME_QM_BASE;
8408 		break;
8409 	case GAUDI2_EVENT_MME3_QM:
8410 		qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
8411 		qman_base = mmDCORE3_MME_QM_BASE;
8412 		break;
8413 	case GAUDI2_EVENT_HDMA0_QM:
8414 		index = 0;
8415 		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0;
8416 		qman_base = mmDCORE0_EDMA0_QM_BASE;
8417 		break;
8418 	case GAUDI2_EVENT_HDMA1_QM:
8419 		index = 1;
8420 		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0;
8421 		qman_base = mmDCORE0_EDMA1_QM_BASE;
8422 		break;
8423 	case GAUDI2_EVENT_HDMA2_QM:
8424 		index = 2;
8425 		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0;
8426 		qman_base = mmDCORE1_EDMA0_QM_BASE;
8427 		break;
8428 	case GAUDI2_EVENT_HDMA3_QM:
8429 		index = 3;
8430 		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0;
8431 		qman_base = mmDCORE1_EDMA1_QM_BASE;
8432 		break;
8433 	case GAUDI2_EVENT_HDMA4_QM:
8434 		index = 4;
8435 		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0;
8436 		qman_base = mmDCORE2_EDMA0_QM_BASE;
8437 		break;
8438 	case GAUDI2_EVENT_HDMA5_QM:
8439 		index = 5;
8440 		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0;
8441 		qman_base = mmDCORE2_EDMA1_QM_BASE;
8442 		break;
8443 	case GAUDI2_EVENT_HDMA6_QM:
8444 		index = 6;
8445 		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0;
8446 		qman_base = mmDCORE3_EDMA0_QM_BASE;
8447 		break;
8448 	case GAUDI2_EVENT_HDMA7_QM:
8449 		index = 7;
8450 		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0;
8451 		qman_base = mmDCORE3_EDMA1_QM_BASE;
8452 		break;
8453 	case GAUDI2_EVENT_PDMA0_QM:
8454 		qid_base = GAUDI2_QUEUE_ID_PDMA_0_0;
8455 		qman_base = mmPDMA0_QM_BASE;
8456 		break;
8457 	case GAUDI2_EVENT_PDMA1_QM:
8458 		qid_base = GAUDI2_QUEUE_ID_PDMA_1_0;
8459 		qman_base = mmPDMA1_QM_BASE;
8460 		break;
8461 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
8462 		qid_base = GAUDI2_QUEUE_ID_ROT_0_0;
8463 		qman_base = mmROT0_QM_BASE;
8464 		break;
8465 	case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
8466 		qid_base = GAUDI2_QUEUE_ID_ROT_1_0;
8467 		qman_base = mmROT1_QM_BASE;
8468 		break;
8469 	default:
8470 		return 0;
8471 	}
8472 
8473 	error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base);
8474 
8475 	/* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
8476 	if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) {
8477 		error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8478 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, index, 0, event_mask);
8479 	}
8480 
8481 	hl_check_for_glbl_errors(hdev);
8482 
8483 	return error_count;
8484 }
8485 
8486 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type)
8487 {
8488 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8489 
8490 	sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS);
8491 
8492 	for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
8493 		if (sts_val & BIT(i)) {
8494 			gaudi2_print_event(hdev, event_type, true,
8495 				"err cause: %s", gaudi2_arc_sei_error_cause[i]);
8496 			sts_clr_val |= BIT(i);
8497 			error_count++;
8498 		}
8499 	}
8500 
8501 	hl_check_for_glbl_errors(hdev);
8502 
8503 	WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR, sts_clr_val);
8504 
8505 	return error_count;
8506 }
8507 
8508 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type)
8509 {
8510 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8511 
8512 	sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS);
8513 
8514 	for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) {
8515 		if (sts_val & BIT(i)) {
8516 			gaudi2_print_event(hdev, event_type, true,
8517 				"err cause: %s", gaudi2_cpu_sei_error_cause[i]);
8518 			sts_clr_val |= BIT(i);
8519 			error_count++;
8520 		}
8521 	}
8522 
8523 	hl_check_for_glbl_errors(hdev);
8524 
8525 	WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val);
8526 
8527 	return error_count;
8528 }
8529 
8530 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type,
8531 					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8532 					u64 *event_mask)
8533 {
8534 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8535 	u32 error_count = 0;
8536 	int i;
8537 
8538 	for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++)
8539 		if (intr_cause_data & BIT(i)) {
8540 			gaudi2_print_event(hdev, event_type, true,
8541 				"err cause: %s", guadi2_rot_error_cause[i]);
8542 			error_count++;
8543 		}
8544 
8545 	/* check if RAZWI happened */
8546 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, event_mask);
8547 	hl_check_for_glbl_errors(hdev);
8548 
8549 	return error_count;
8550 }
8551 
8552 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev,  u8 tpc_index, u16 event_type,
8553 					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8554 					u64 *event_mask)
8555 {
8556 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8557 	u32 error_count = 0;
8558 	int i;
8559 
8560 	for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++)
8561 		if (intr_cause_data & BIT(i)) {
8562 			gaudi2_print_event(hdev, event_type, true,
8563 				"interrupt cause: %s",  gaudi2_tpc_interrupts_cause[i]);
8564 			error_count++;
8565 		}
8566 
8567 	/* check if RAZWI happened */
8568 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, event_mask);
8569 	hl_check_for_glbl_errors(hdev);
8570 
8571 	return error_count;
8572 }
8573 
8574 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type,
8575 					u64 *event_mask)
8576 {
8577 	u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0;
8578 	int i;
8579 
8580 	if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES)
8581 		/* DCORE DEC */
8582 		sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR +
8583 				DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) +
8584 				DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE);
8585 	else
8586 		/* PCIE DEC */
8587 		sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET *
8588 				(dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES);
8589 
8590 	sts_val = RREG32(sts_addr);
8591 
8592 	for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) {
8593 		if (sts_val & BIT(i)) {
8594 			gaudi2_print_event(hdev, event_type, true,
8595 				"err cause: %s", gaudi2_dec_error_cause[i]);
8596 			sts_clr_val |= BIT(i);
8597 			error_count++;
8598 		}
8599 	}
8600 
8601 	/* check if RAZWI happened */
8602 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, event_mask);
8603 	hl_check_for_glbl_errors(hdev);
8604 
8605 	/* Write 1 clear errors */
8606 	WREG32(sts_addr, sts_clr_val);
8607 
8608 	return error_count;
8609 }
8610 
8611 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8612 					u64 *event_mask)
8613 {
8614 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8615 	int i;
8616 
8617 	sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index;
8618 	sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index;
8619 
8620 	sts_val = RREG32(sts_addr);
8621 
8622 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) {
8623 		if (sts_val & BIT(i)) {
8624 			gaudi2_print_event(hdev, event_type, true,
8625 				"err cause: %s", guadi2_mme_error_cause[i]);
8626 			sts_clr_val |= BIT(i);
8627 			error_count++;
8628 		}
8629 	}
8630 
8631 	/* check if RAZWI happened */
8632 	for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
8633 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, event_mask);
8634 
8635 	hl_check_for_glbl_errors(hdev);
8636 
8637 	WREG32(sts_clr_addr, sts_clr_val);
8638 
8639 	return error_count;
8640 }
8641 
8642 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type,
8643 					u64 intr_cause_data)
8644 {
8645 	int i, error_count = 0;
8646 
8647 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
8648 		if (intr_cause_data & BIT(i)) {
8649 			gaudi2_print_event(hdev, event_type, true,
8650 				"err cause: %s", guadi2_mme_sbte_error_cause[i]);
8651 			error_count++;
8652 		}
8653 
8654 	hl_check_for_glbl_errors(hdev);
8655 
8656 	return error_count;
8657 }
8658 
8659 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8660 					u64 *event_mask)
8661 {
8662 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8663 	int i;
8664 
8665 	sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index;
8666 	sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index;
8667 
8668 	sts_val = RREG32(sts_addr);
8669 
8670 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) {
8671 		if (sts_val & BIT(i)) {
8672 			gaudi2_print_event(hdev, event_type, true,
8673 				"err cause: %s", guadi2_mme_wap_error_cause[i]);
8674 			sts_clr_val |= BIT(i);
8675 			error_count++;
8676 		}
8677 	}
8678 
8679 	/* check if RAZWI happened on WAP0/1 */
8680 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, event_mask);
8681 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, event_mask);
8682 	hl_check_for_glbl_errors(hdev);
8683 
8684 	WREG32(sts_clr_addr, sts_clr_val);
8685 
8686 	return error_count;
8687 }
8688 
8689 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type,
8690 					u64 intr_cause_data)
8691 {
8692 	u32 error_count = 0;
8693 	int i;
8694 
8695 	/* If an AXI read or write error is received, an error is reported and
8696 	 * interrupt message is sent. Due to an HW errata, when reading the cause
8697 	 * register of the KDMA engine, the reported error is always HBW even if
8698 	 * the actual error caused by a LBW KDMA transaction.
8699 	 */
8700 	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8701 		if (intr_cause_data & BIT(i)) {
8702 			gaudi2_print_event(hdev, event_type, true,
8703 				"err cause: %s", gaudi2_kdma_core_interrupts_cause[i]);
8704 			error_count++;
8705 		}
8706 
8707 	hl_check_for_glbl_errors(hdev);
8708 
8709 	return error_count;
8710 }
8711 
8712 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, int sts_addr)
8713 {
8714 	u32 error_count = 0, sts_val = RREG32(sts_addr);
8715 	int i;
8716 
8717 	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8718 		if (sts_val & BIT(i)) {
8719 			gaudi2_print_event(hdev, event_type, true,
8720 				"err cause: %s", gaudi2_dma_core_interrupts_cause[i]);
8721 			error_count++;
8722 		}
8723 
8724 	hl_check_for_glbl_errors(hdev);
8725 
8726 	return error_count;
8727 }
8728 
8729 static int gaudi2_handle_pdma_core_event(struct hl_device *hdev, u16 event_type, int pdma_idx)
8730 {
8731 	u32 sts_addr;
8732 
8733 	sts_addr = mmPDMA0_CORE_ERR_CAUSE + pdma_idx * PDMA_OFFSET;
8734 	return gaudi2_handle_dma_core_event(hdev, event_type, sts_addr);
8735 }
8736 
8737 static int gaudi2_handle_edma_core_event(struct hl_device *hdev, u16 event_type, int edma_idx)
8738 {
8739 	static const int edma_event_index_map[] = {2, 3, 0, 1, 6, 7, 4, 5};
8740 	u32 sts_addr, index;
8741 
8742 	index = edma_event_index_map[edma_idx];
8743 
8744 	sts_addr = mmDCORE0_EDMA0_CORE_ERR_CAUSE +
8745 				DCORE_OFFSET * (index / NUM_OF_EDMA_PER_DCORE) +
8746 				DCORE_EDMA_OFFSET * (index % NUM_OF_EDMA_PER_DCORE);
8747 	return gaudi2_handle_dma_core_event(hdev, event_type, sts_addr);
8748 }
8749 
8750 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
8751 {
8752 	u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
8753 
8754 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
8755 	if (RREG32(razwi_happened_addr)) {
8756 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8757 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8758 		WREG32(razwi_happened_addr, 0x1);
8759 	}
8760 
8761 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
8762 	if (RREG32(razwi_happened_addr)) {
8763 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8764 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8765 		WREG32(razwi_happened_addr, 0x1);
8766 	}
8767 
8768 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
8769 	if (RREG32(razwi_happened_addr)) {
8770 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8771 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8772 		WREG32(razwi_happened_addr, 0x1);
8773 	}
8774 
8775 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
8776 	if (RREG32(razwi_happened_addr)) {
8777 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8778 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8779 		WREG32(razwi_happened_addr, 0x1);
8780 	}
8781 }
8782 
8783 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type,
8784 					u64 intr_cause_data, u64 *event_mask)
8785 {
8786 	u32 error_count = 0;
8787 	int i;
8788 
8789 	for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
8790 		if (!(intr_cause_data & BIT_ULL(i)))
8791 			continue;
8792 
8793 		gaudi2_print_event(hdev, event_type, true,
8794 			"err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]);
8795 		error_count++;
8796 
8797 		switch (intr_cause_data & BIT_ULL(i)) {
8798 		case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
8799 			hl_check_for_glbl_errors(hdev);
8800 			break;
8801 		case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
8802 			gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
8803 			break;
8804 		}
8805 	}
8806 
8807 	return error_count;
8808 }
8809 
8810 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type,
8811 				u64 intr_cause_data)
8812 
8813 {
8814 	u32 error_count = 0;
8815 	int i;
8816 
8817 	for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) {
8818 		if (intr_cause_data & BIT_ULL(i)) {
8819 			gaudi2_print_event(hdev, event_type, true,
8820 				"err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]);
8821 			error_count++;
8822 		}
8823 	}
8824 
8825 	return error_count;
8826 }
8827 
8828 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data)
8829 {
8830 	u32 error_count = 0;
8831 	int i;
8832 
8833 	for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) {
8834 		if (intr_cause_data & BIT_ULL(i)) {
8835 			gaudi2_print_event(hdev, event_type, true,
8836 				"err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]);
8837 			error_count++;
8838 		}
8839 	}
8840 
8841 	return error_count;
8842 }
8843 
8844 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu,
8845 					u64 *event_mask)
8846 {
8847 	u32 valid, val, axid_l, axid_h;
8848 	u64 addr;
8849 
8850 	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8851 
8852 	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK))
8853 		return;
8854 
8855 	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE));
8856 	addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK;
8857 	addr <<= 32;
8858 	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
8859 
8860 	axid_l = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_FAULT_ID_LSB));
8861 	axid_h = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_FAULT_ID_MSB));
8862 
8863 	dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx, transaction id 0x%llX\n",
8864 				is_pmmu ? "PMMU" : "HMMU", addr, ((u64)axid_h << 32) + axid_l);
8865 	hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask);
8866 
8867 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
8868 }
8869 
8870 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
8871 {
8872 	u32 valid, val;
8873 	u64 addr;
8874 
8875 	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8876 
8877 	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK))
8878 		return;
8879 
8880 	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE));
8881 	addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK;
8882 	addr <<= 32;
8883 	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
8884 
8885 	dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
8886 				is_pmmu ? "PMMU" : "HMMU", addr);
8887 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE), 0);
8888 }
8889 
8890 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type,
8891 						u64 mmu_base, bool is_pmmu, u64 *event_mask)
8892 {
8893 	u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0;
8894 	int i;
8895 
8896 	spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET);
8897 
8898 	for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) {
8899 		if (spi_sei_cause & BIT(i)) {
8900 			gaudi2_print_event(hdev, event_type, true,
8901 				"err cause: %s", gaudi2_mmu_spi_sei[i].cause);
8902 
8903 			if (i == 0)
8904 				gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask);
8905 			else if (i == 1)
8906 				gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
8907 
8908 			if (gaudi2_mmu_spi_sei[i].clear_bit >= 0)
8909 				interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit);
8910 
8911 			error_count++;
8912 		}
8913 	}
8914 
8915 	/* Clear cause */
8916 	WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause);
8917 
8918 	/* Clear interrupt */
8919 	WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
8920 
8921 	return error_count;
8922 }
8923 
8924 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index)
8925 {
8926 	u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log,
8927 		cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0;
8928 	int i;
8929 
8930 	sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
8931 	cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index;
8932 
8933 	sei_cause_val = RREG32(sei_cause_addr);
8934 	sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val);
8935 	cq_intr_val = RREG32(cq_intr_addr);
8936 
8937 	/* SEI interrupt */
8938 	if (sei_cause_cause) {
8939 		/* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */
8940 		sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK,
8941 					sei_cause_val);
8942 
8943 		for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) {
8944 			if (!(sei_cause_cause & BIT(i)))
8945 				continue;
8946 
8947 			gaudi2_print_event(hdev, event_type, true,
8948 				"err cause: %s. %s: 0x%X",
8949 				gaudi2_sm_sei_cause[i].cause_name,
8950 				gaudi2_sm_sei_cause[i].log_name,
8951 				sei_cause_log);
8952 			error_count++;
8953 			break;
8954 		}
8955 
8956 		/* Clear SM_SEI_CAUSE */
8957 		WREG32(sei_cause_addr, 0);
8958 	}
8959 
8960 	/* CQ interrupt */
8961 	if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) {
8962 		cq_intr_queue_index =
8963 				FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK,
8964 					cq_intr_val);
8965 
8966 		dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n",
8967 				sm_index, cq_intr_queue_index);
8968 		error_count++;
8969 
8970 		/* Clear CQ_INTR */
8971 		WREG32(cq_intr_addr, 0);
8972 	}
8973 
8974 	hl_check_for_glbl_errors(hdev);
8975 
8976 	return error_count;
8977 }
8978 
8979 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8980 {
8981 	bool is_pmmu = false;
8982 	u32 error_count = 0;
8983 	u64 mmu_base;
8984 	u8 index;
8985 
8986 	switch (event_type) {
8987 	case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
8988 		index = (event_type - GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM) / 3;
8989 		mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8990 		break;
8991 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
8992 		index = (event_type - GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP);
8993 		mmu_base = mmDCORE0_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8994 		break;
8995 	case GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
8996 		index = (event_type - GAUDI2_EVENT_HMMU8_PAGE_FAULT_WR_PERM) / 3;
8997 		mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
8998 		break;
8999 	case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
9000 		index = (event_type - GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP);
9001 		mmu_base = mmDCORE1_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
9002 		break;
9003 	case GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
9004 		index = (event_type - GAUDI2_EVENT_HMMU7_PAGE_FAULT_WR_PERM) / 3;
9005 		mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
9006 		break;
9007 	case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
9008 		index = (event_type - GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP);
9009 		mmu_base = mmDCORE2_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
9010 		break;
9011 	case GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9012 		index = (event_type - GAUDI2_EVENT_HMMU15_PAGE_FAULT_WR_PERM) / 3;
9013 		mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
9014 		break;
9015 	case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9016 		index = (event_type - GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP);
9017 		mmu_base = mmDCORE3_HMMU0_MMU_BASE + index * DCORE_HMMU_OFFSET;
9018 		break;
9019 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9020 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9021 		is_pmmu = true;
9022 		mmu_base = mmPMMU_HBW_MMU_BASE;
9023 		break;
9024 	default:
9025 		return 0;
9026 	}
9027 
9028 	error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base,
9029 							is_pmmu, event_mask);
9030 	hl_check_for_glbl_errors(hdev);
9031 
9032 	return error_count;
9033 }
9034 
9035 
9036 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */
9037 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
9038 			struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
9039 {
9040 	u32 addr, beat, beat_shift;
9041 	bool rc = false;
9042 
9043 	dev_err_ratelimited(hdev->dev,
9044 			"READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
9045 			FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt),
9046 			FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt),
9047 			FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt));
9048 
9049 	addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val);
9050 	dev_err_ratelimited(hdev->dev,
9051 			"READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n",
9052 			FIELD_GET(HBM_RD_ADDR_SID_MASK, addr),
9053 			FIELD_GET(HBM_RD_ADDR_BG_MASK, addr),
9054 			FIELD_GET(HBM_RD_ADDR_BA_MASK, addr),
9055 			FIELD_GET(HBM_RD_ADDR_COL_MASK, addr),
9056 			FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr));
9057 
9058 	/* For each beat (RDQS edge), look for possible errors and print relevant info */
9059 	for (beat = 0 ; beat < 4 ; beat++) {
9060 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9061 			(HBM_RD_ERR_SERR_BEAT0_MASK << beat))
9062 			dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n",
9063 						beat,
9064 						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9065 						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9066 
9067 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9068 			(HBM_RD_ERR_DERR_BEAT0_MASK << beat)) {
9069 			dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n",
9070 						beat,
9071 						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9072 						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9073 			rc |= true;
9074 		}
9075 
9076 		beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
9077 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9078 			(HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) {
9079 			dev_err_ratelimited(hdev->dev,
9080 					"Beat%d read PARITY: DM: %#x, PAR data: %#x\n",
9081 					beat,
9082 					le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9083 					(le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9084 						(HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
9085 						(HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
9086 			rc |= true;
9087 		}
9088 
9089 		dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
9090 		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9091 					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2]));
9092 		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9093 					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
9094 	}
9095 
9096 	return rc;
9097 }
9098 
9099 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
9100 			struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt)
9101 {
9102 	struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds;
9103 	u32 i, curr_addr, derr = wr_par_err_data->dbg_derr;
9104 
9105 	dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt);
9106 
9107 	dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n",
9108 				derr & 0x3, derr & 0xc);
9109 
9110 	/* JIRA H6-3286 - the following prints may not be valid */
9111 	dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n");
9112 	for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) {
9113 		curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr);
9114 		dev_err_ratelimited(hdev->dev,
9115 				"\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n",
9116 				i,
9117 				FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr),
9118 				FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr),
9119 				FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr),
9120 				FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr));
9121 	}
9122 }
9123 
9124 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev,
9125 		struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt)
9126 {
9127 	__le32 *col_cmd = ca_par_err_data->dbg_col;
9128 	__le16 *row_cmd = ca_par_err_data->dbg_row;
9129 	u32 i;
9130 
9131 	dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt);
9132 
9133 	dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n");
9134 	for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++)
9135 		dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i,
9136 			le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0),
9137 			le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0));
9138 }
9139 
9140 /* Returns true if hard reset is needed or false otherwise */
9141 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
9142 					struct hl_eq_hbm_sei_data *sei_data)
9143 {
9144 	bool require_hard_reset = false;
9145 	u32 hbm_id, mc_id, cause_idx;
9146 
9147 	hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4;
9148 	mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2;
9149 
9150 	cause_idx = sei_data->hdr.sei_cause;
9151 	if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
9152 		gaudi2_print_event(hdev, event_type, true,
9153 			"err cause: %s",
9154 			"Invalid HBM SEI event cause (%d) provided by FW", cause_idx);
9155 		return true;
9156 	}
9157 
9158 	gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical,
9159 		"System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s",
9160 		sei_data->hdr.is_critical ? "Critical" : "Non-critical",
9161 		hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
9162 		hbm_mc_sei_cause[cause_idx]);
9163 
9164 	/* Print error-specific info */
9165 	switch (cause_idx) {
9166 	case HBM_SEI_CATTRIP:
9167 		require_hard_reset = true;
9168 		break;
9169 
9170 	case  HBM_SEI_CMD_PARITY_EVEN:
9171 		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info,
9172 						le32_to_cpu(sei_data->hdr.cnt));
9173 		require_hard_reset = true;
9174 		break;
9175 
9176 	case  HBM_SEI_CMD_PARITY_ODD:
9177 		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info,
9178 						le32_to_cpu(sei_data->hdr.cnt));
9179 		require_hard_reset = true;
9180 		break;
9181 
9182 	case HBM_SEI_WRITE_DATA_PARITY_ERR:
9183 		gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info,
9184 						le32_to_cpu(sei_data->hdr.cnt));
9185 		require_hard_reset = true;
9186 		break;
9187 
9188 	case HBM_SEI_READ_ERR:
9189 		/* Unlike other SEI events, read error requires further processing of the
9190 		 * raw data in order to determine the root cause.
9191 		 */
9192 		require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev,
9193 								&sei_data->read_err_info,
9194 								le32_to_cpu(sei_data->hdr.cnt));
9195 		break;
9196 
9197 	default:
9198 		break;
9199 	}
9200 
9201 	require_hard_reset |= !!sei_data->hdr.is_critical;
9202 
9203 	return require_hard_reset;
9204 }
9205 
9206 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type,
9207 				u64 intr_cause_data)
9208 {
9209 	if (intr_cause_data) {
9210 		gaudi2_print_event(hdev, event_type, true,
9211 			"temperature error cause: %#llx", intr_cause_data);
9212 		return 1;
9213 	}
9214 
9215 	return 0;
9216 }
9217 
9218 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data)
9219 {
9220 	u32 i, error_count = 0;
9221 
9222 	for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++)
9223 		if (intr_cause_data & hbm_mc_spi[i].mask) {
9224 			dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n",
9225 				hbm_mc_spi[i].cause);
9226 			error_count++;
9227 		}
9228 
9229 	return error_count;
9230 }
9231 
9232 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9233 {
9234 	ktime_t zero_time = ktime_set(0, 0);
9235 
9236 	mutex_lock(&hdev->clk_throttling.lock);
9237 
9238 	switch (event_type) {
9239 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9240 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
9241 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
9242 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
9243 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
9244 		dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
9245 		break;
9246 
9247 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9248 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
9249 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
9250 		dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
9251 		break;
9252 
9253 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9254 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
9255 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
9256 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
9257 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
9258 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9259 		dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
9260 		break;
9261 
9262 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9263 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
9264 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
9265 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9266 		dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
9267 		break;
9268 
9269 	default:
9270 		dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
9271 		break;
9272 	}
9273 
9274 	mutex_unlock(&hdev->clk_throttling.lock);
9275 }
9276 
9277 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type,
9278 					struct cpucp_pkt_sync_err *sync_err)
9279 {
9280 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9281 
9282 	gaudi2_print_event(hdev, event_type, false,
9283 		"FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9284 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci),
9285 		q->pi, atomic_read(&q->ci));
9286 }
9287 
9288 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
9289 {
9290 	u32 p2p_intr, msix_gw_intr, error_count = 0;
9291 
9292 	p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR);
9293 	msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR);
9294 
9295 	if (p2p_intr) {
9296 		gaudi2_print_event(hdev, event_type, true,
9297 			"pcie p2p transaction terminated due to security, req_id(0x%x)",
9298 			RREG32(mmPCIE_WRAP_P2P_REQ_ID));
9299 
9300 		WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
9301 		error_count++;
9302 	}
9303 
9304 	if (msix_gw_intr) {
9305 		gaudi2_print_event(hdev, event_type, true,
9306 			"pcie msi-x gen denied due to vector num check failure, vec(0x%X)",
9307 			RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
9308 
9309 		WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
9310 		error_count++;
9311 	}
9312 
9313 	return error_count;
9314 }
9315 
9316 static int gaudi2_handle_pcie_drain(struct hl_device *hdev,
9317 			struct hl_eq_pcie_drain_ind_data *drain_data)
9318 {
9319 	u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0;
9320 
9321 	cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
9322 	lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw);
9323 	lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw);
9324 	hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw);
9325 	hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw);
9326 
9327 	if (cause & BIT_ULL(0)) {
9328 		dev_err_ratelimited(hdev->dev,
9329 			"PCIE AXI drain LBW completed, read_err %u, write_err %u\n",
9330 			!!lbw_rd, !!lbw_wr);
9331 		error_count++;
9332 	}
9333 
9334 	if (cause & BIT_ULL(1)) {
9335 		dev_err_ratelimited(hdev->dev,
9336 			"PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n",
9337 			hbw_rd, hbw_wr);
9338 		error_count++;
9339 	}
9340 
9341 	return error_count;
9342 }
9343 
9344 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data)
9345 {
9346 	u32 error_count = 0;
9347 	int i;
9348 
9349 	for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) {
9350 		if (intr_cause_data & BIT_ULL(i)) {
9351 			dev_err_ratelimited(hdev->dev, "PSOC %s completed\n",
9352 				gaudi2_psoc_axi_drain_interrupts_cause[i]);
9353 			error_count++;
9354 		}
9355 	}
9356 
9357 	hl_check_for_glbl_errors(hdev);
9358 
9359 	return error_count;
9360 }
9361 
9362 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type,
9363 					struct cpucp_pkt_sync_err *sync_err)
9364 {
9365 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9366 
9367 	gaudi2_print_event(hdev, event_type, false,
9368 		"FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9369 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
9370 }
9371 
9372 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
9373 					struct hl_eq_engine_arc_intr_data *data)
9374 {
9375 	struct hl_engine_arc_dccm_queue_full_irq *q;
9376 	u32 intr_type, engine_id;
9377 	u64 payload;
9378 
9379 	intr_type = le32_to_cpu(data->intr_type);
9380 	engine_id = le32_to_cpu(data->engine_id);
9381 	payload = le64_to_cpu(data->payload);
9382 
9383 	switch (intr_type) {
9384 	case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ:
9385 		q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
9386 
9387 		gaudi2_print_event(hdev, event_type, true,
9388 				"ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u",
9389 				engine_id, intr_type, q->queue_index);
9390 		return 1;
9391 	default:
9392 		gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type");
9393 		return 0;
9394 	}
9395 }
9396 
9397 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
9398 {
9399 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9400 	bool reset_required = false, is_critical = false;
9401 	u32 index, ctl, reset_flags = 0, error_count = 0;
9402 	u64 event_mask = 0;
9403 	u16 event_type;
9404 
9405 	ctl = le32_to_cpu(eq_entry->hdr.ctl);
9406 	event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
9407 
9408 	if (event_type >= GAUDI2_EVENT_SIZE) {
9409 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
9410 				event_type, GAUDI2_EVENT_SIZE - 1);
9411 		return;
9412 	}
9413 
9414 	gaudi2->events_stat[event_type]++;
9415 	gaudi2->events_stat_aggregate[event_type]++;
9416 
9417 	switch (event_type) {
9418 	case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR:
9419 		fallthrough;
9420 	case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
9421 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9422 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9423 		reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
9424 		is_critical = eq_entry->ecc_data.is_critical;
9425 		error_count++;
9426 		break;
9427 
9428 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
9429 		fallthrough;
9430 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM:
9431 		fallthrough;
9432 	case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
9433 		error_count = gaudi2_handle_qman_err(hdev, event_type, &event_mask);
9434 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9435 		break;
9436 
9437 	case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
9438 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9439 		error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type);
9440 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9441 		break;
9442 
9443 	case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
9444 		error_count = gaudi2_handle_cpu_sei_err(hdev, event_type);
9445 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9446 		break;
9447 
9448 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
9449 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
9450 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9451 		error_count = gaudi2_handle_qm_sei_err(hdev, event_type, true, &event_mask);
9452 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9453 		break;
9454 
9455 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
9456 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
9457 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
9458 		error_count = gaudi2_handle_rot_err(hdev, index, event_type,
9459 					&eq_entry->razwi_with_intr_cause, &event_mask);
9460 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9461 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9462 		break;
9463 
9464 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
9465 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
9466 		error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9467 						&eq_entry->razwi_with_intr_cause, &event_mask);
9468 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9469 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9470 		break;
9471 
9472 	case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
9473 		index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
9474 		error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9475 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9476 		break;
9477 
9478 	case GAUDI2_EVENT_TPC0_KERNEL_ERR:
9479 	case GAUDI2_EVENT_TPC1_KERNEL_ERR:
9480 	case GAUDI2_EVENT_TPC2_KERNEL_ERR:
9481 	case GAUDI2_EVENT_TPC3_KERNEL_ERR:
9482 	case GAUDI2_EVENT_TPC4_KERNEL_ERR:
9483 	case GAUDI2_EVENT_TPC5_KERNEL_ERR:
9484 	case GAUDI2_EVENT_TPC6_KERNEL_ERR:
9485 	case GAUDI2_EVENT_TPC7_KERNEL_ERR:
9486 	case GAUDI2_EVENT_TPC8_KERNEL_ERR:
9487 	case GAUDI2_EVENT_TPC9_KERNEL_ERR:
9488 	case GAUDI2_EVENT_TPC10_KERNEL_ERR:
9489 	case GAUDI2_EVENT_TPC11_KERNEL_ERR:
9490 	case GAUDI2_EVENT_TPC12_KERNEL_ERR:
9491 	case GAUDI2_EVENT_TPC13_KERNEL_ERR:
9492 	case GAUDI2_EVENT_TPC14_KERNEL_ERR:
9493 	case GAUDI2_EVENT_TPC15_KERNEL_ERR:
9494 	case GAUDI2_EVENT_TPC16_KERNEL_ERR:
9495 	case GAUDI2_EVENT_TPC17_KERNEL_ERR:
9496 	case GAUDI2_EVENT_TPC18_KERNEL_ERR:
9497 	case GAUDI2_EVENT_TPC19_KERNEL_ERR:
9498 	case GAUDI2_EVENT_TPC20_KERNEL_ERR:
9499 	case GAUDI2_EVENT_TPC21_KERNEL_ERR:
9500 	case GAUDI2_EVENT_TPC22_KERNEL_ERR:
9501 	case GAUDI2_EVENT_TPC23_KERNEL_ERR:
9502 	case GAUDI2_EVENT_TPC24_KERNEL_ERR:
9503 		index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
9504 			(GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
9505 		error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9506 					&eq_entry->razwi_with_intr_cause, &event_mask);
9507 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9508 		break;
9509 
9510 	case GAUDI2_EVENT_DEC0_SPI:
9511 	case GAUDI2_EVENT_DEC1_SPI:
9512 	case GAUDI2_EVENT_DEC2_SPI:
9513 	case GAUDI2_EVENT_DEC3_SPI:
9514 	case GAUDI2_EVENT_DEC4_SPI:
9515 	case GAUDI2_EVENT_DEC5_SPI:
9516 	case GAUDI2_EVENT_DEC6_SPI:
9517 	case GAUDI2_EVENT_DEC7_SPI:
9518 	case GAUDI2_EVENT_DEC8_SPI:
9519 	case GAUDI2_EVENT_DEC9_SPI:
9520 		index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
9521 				(GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
9522 		error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9523 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9524 		break;
9525 
9526 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
9527 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
9528 	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
9529 	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
9530 		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
9531 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
9532 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
9533 		error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9534 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9535 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9536 		break;
9537 
9538 	case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
9539 	case GAUDI2_EVENT_MME1_QMAN_SW_ERROR:
9540 	case GAUDI2_EVENT_MME2_QMAN_SW_ERROR:
9541 	case GAUDI2_EVENT_MME3_QMAN_SW_ERROR:
9542 		index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
9543 				(GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
9544 					GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
9545 		error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9546 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9547 		break;
9548 
9549 	case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
9550 	case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
9551 	case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
9552 	case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
9553 		index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
9554 				(GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
9555 					GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
9556 		error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type, &event_mask);
9557 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9558 		break;
9559 
9560 	case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
9561 	case GAUDI2_EVENT_KDMA0_CORE:
9562 		error_count = gaudi2_handle_kdma_core_event(hdev, event_type,
9563 					le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9564 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9565 		break;
9566 
9567 	case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_HDMA5_CORE:
9568 		index = event_type - GAUDI2_EVENT_HDMA2_CORE;
9569 		error_count = gaudi2_handle_edma_core_event(hdev, event_type, index);
9570 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9571 		break;
9572 
9573 	case GAUDI2_EVENT_PDMA0_CORE ... GAUDI2_EVENT_PDMA1_CORE:
9574 		index = event_type - GAUDI2_EVENT_PDMA0_CORE;
9575 		error_count = gaudi2_handle_pdma_core_event(hdev, event_type, index);
9576 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9577 		break;
9578 
9579 	case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
9580 		error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type,
9581 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask);
9582 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9583 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9584 		break;
9585 
9586 	case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9587 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9588 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9589 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9590 		error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask);
9591 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9592 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9593 		break;
9594 
9595 	case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
9596 		error_count = gaudi2_handle_hif_fatal(hdev, event_type,
9597 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9598 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9599 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9600 		break;
9601 
9602 	case GAUDI2_EVENT_PMMU_FATAL_0:
9603 		error_count = gaudi2_handle_pif_fatal(hdev, event_type,
9604 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9605 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9606 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9607 		break;
9608 
9609 	case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
9610 		error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask);
9611 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9612 		break;
9613 
9614 	case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
9615 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9616 		if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
9617 			reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9618 			reset_required = true;
9619 		}
9620 		error_count++;
9621 		break;
9622 
9623 	case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
9624 		error_count = gaudi2_handle_hbm_cattrip(hdev, event_type,
9625 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9626 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9627 		break;
9628 
9629 	case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
9630 		error_count = gaudi2_handle_hbm_mc_spi(hdev,
9631 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9632 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9633 		break;
9634 
9635 	case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
9636 		error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
9637 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9638 		break;
9639 
9640 	case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
9641 		error_count = gaudi2_handle_psoc_drain(hdev,
9642 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9643 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9644 		break;
9645 
9646 	case GAUDI2_EVENT_CPU_AXI_ECC:
9647 		error_count = GAUDI2_NA_EVENT_CAUSE;
9648 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9649 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9650 		break;
9651 	case GAUDI2_EVENT_CPU_L2_RAM_ECC:
9652 		error_count = GAUDI2_NA_EVENT_CAUSE;
9653 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9654 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9655 		break;
9656 	case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
9657 	case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
9658 	case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
9659 	case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
9660 		error_count = gaudi2_handle_mme_sbte_err(hdev, event_type,
9661 						le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9662 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9663 		break;
9664 	case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
9665 		error_count = GAUDI2_NA_EVENT_CAUSE;
9666 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9667 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9668 		break;
9669 	case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
9670 		error_count = GAUDI2_NA_EVENT_CAUSE;
9671 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9672 		break;
9673 	case GAUDI2_EVENT_PSOC_PRSTN_FALL:
9674 		error_count = GAUDI2_NA_EVENT_CAUSE;
9675 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9676 		break;
9677 	case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
9678 		error_count = GAUDI2_NA_EVENT_CAUSE;
9679 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9680 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9681 		break;
9682 	case GAUDI2_EVENT_PCIE_FATAL_ERR:
9683 		error_count = GAUDI2_NA_EVENT_CAUSE;
9684 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9685 		break;
9686 	case GAUDI2_EVENT_TPC0_BMON_SPMU:
9687 	case GAUDI2_EVENT_TPC1_BMON_SPMU:
9688 	case GAUDI2_EVENT_TPC2_BMON_SPMU:
9689 	case GAUDI2_EVENT_TPC3_BMON_SPMU:
9690 	case GAUDI2_EVENT_TPC4_BMON_SPMU:
9691 	case GAUDI2_EVENT_TPC5_BMON_SPMU:
9692 	case GAUDI2_EVENT_TPC6_BMON_SPMU:
9693 	case GAUDI2_EVENT_TPC7_BMON_SPMU:
9694 	case GAUDI2_EVENT_TPC8_BMON_SPMU:
9695 	case GAUDI2_EVENT_TPC9_BMON_SPMU:
9696 	case GAUDI2_EVENT_TPC10_BMON_SPMU:
9697 	case GAUDI2_EVENT_TPC11_BMON_SPMU:
9698 	case GAUDI2_EVENT_TPC12_BMON_SPMU:
9699 	case GAUDI2_EVENT_TPC13_BMON_SPMU:
9700 	case GAUDI2_EVENT_TPC14_BMON_SPMU:
9701 	case GAUDI2_EVENT_TPC15_BMON_SPMU:
9702 	case GAUDI2_EVENT_TPC16_BMON_SPMU:
9703 	case GAUDI2_EVENT_TPC17_BMON_SPMU:
9704 	case GAUDI2_EVENT_TPC18_BMON_SPMU:
9705 	case GAUDI2_EVENT_TPC19_BMON_SPMU:
9706 	case GAUDI2_EVENT_TPC20_BMON_SPMU:
9707 	case GAUDI2_EVENT_TPC21_BMON_SPMU:
9708 	case GAUDI2_EVENT_TPC22_BMON_SPMU:
9709 	case GAUDI2_EVENT_TPC23_BMON_SPMU:
9710 	case GAUDI2_EVENT_TPC24_BMON_SPMU:
9711 	case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU:
9712 	case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU:
9713 	case GAUDI2_EVENT_MME0_WAP_BMON_SPMU:
9714 	case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU:
9715 	case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU:
9716 	case GAUDI2_EVENT_MME1_WAP_BMON_SPMU:
9717 	case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU:
9718 	case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU:
9719 	case GAUDI2_EVENT_MME2_WAP_BMON_SPMU:
9720 	case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU:
9721 	case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU:
9722 	case GAUDI2_EVENT_MME3_WAP_BMON_SPMU:
9723 	case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU:
9724 		fallthrough;
9725 	case GAUDI2_EVENT_DEC0_BMON_SPMU:
9726 	case GAUDI2_EVENT_DEC1_BMON_SPMU:
9727 	case GAUDI2_EVENT_DEC2_BMON_SPMU:
9728 	case GAUDI2_EVENT_DEC3_BMON_SPMU:
9729 	case GAUDI2_EVENT_DEC4_BMON_SPMU:
9730 	case GAUDI2_EVENT_DEC5_BMON_SPMU:
9731 	case GAUDI2_EVENT_DEC6_BMON_SPMU:
9732 	case GAUDI2_EVENT_DEC7_BMON_SPMU:
9733 	case GAUDI2_EVENT_DEC8_BMON_SPMU:
9734 	case GAUDI2_EVENT_DEC9_BMON_SPMU:
9735 	case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
9736 		error_count = GAUDI2_NA_EVENT_CAUSE;
9737 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9738 		break;
9739 
9740 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9741 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9742 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9743 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9744 		gaudi2_print_clk_change_info(hdev, event_type, &event_mask);
9745 		error_count = GAUDI2_NA_EVENT_CAUSE;
9746 		break;
9747 
9748 	case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
9749 		gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err);
9750 		error_count = GAUDI2_NA_EVENT_CAUSE;
9751 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9752 		break;
9753 
9754 	case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
9755 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9756 		error_count = GAUDI2_NA_EVENT_CAUSE;
9757 		/* Do nothing- FW will handle it */
9758 		break;
9759 
9760 	case GAUDI2_EVENT_PCIE_P2P_MSIX:
9761 		error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type);
9762 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9763 		break;
9764 
9765 	case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
9766 		index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
9767 		error_count = gaudi2_handle_sm_err(hdev, event_type, index);
9768 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9769 		break;
9770 
9771 	case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
9772 		error_count = GAUDI2_NA_EVENT_CAUSE;
9773 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9774 		break;
9775 
9776 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
9777 		dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
9778 						le64_to_cpu(eq_entry->data[0]));
9779 		error_count = GAUDI2_NA_EVENT_CAUSE;
9780 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9781 		break;
9782 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
9783 		dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
9784 						le64_to_cpu(eq_entry->data[0]));
9785 		error_count = GAUDI2_NA_EVENT_CAUSE;
9786 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9787 		break;
9788 
9789 	case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
9790 		gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err);
9791 		error_count = GAUDI2_NA_EVENT_CAUSE;
9792 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9793 		break;
9794 
9795 	case GAUDI2_EVENT_ARC_DCCM_FULL:
9796 		error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data);
9797 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9798 		break;
9799 
9800 	case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
9801 	case GAUDI2_EVENT_CPU_DEV_RESET_REQ:
9802 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9803 		error_count = GAUDI2_NA_EVENT_CAUSE;
9804 		is_critical = true;
9805 		break;
9806 
9807 	default:
9808 		if (gaudi2_irq_map_table[event_type].valid) {
9809 			dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
9810 						event_type);
9811 			error_count = GAUDI2_NA_EVENT_CAUSE;
9812 		}
9813 	}
9814 
9815 	/* Make sure to dump an error in case no error cause was printed so far.
9816 	 * Note that although we have counted the errors, we use this number as
9817 	 * a boolean.
9818 	 */
9819 	if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type))
9820 		gaudi2_print_event(hdev, event_type, true, "%d", event_type);
9821 	else if (error_count == 0)
9822 		gaudi2_print_event(hdev, event_type, true,
9823 				"No error cause for H/W event %u", event_type);
9824 
9825 	if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) ||
9826 				reset_required) {
9827 		if (reset_required ||
9828 				(gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD))
9829 			reset_flags |= HL_DRV_RESET_HARD;
9830 
9831 		if (hdev->hard_reset_on_fw_events ||
9832 				(hdev->asic_prop.fw_security_enabled && is_critical))
9833 			goto reset_device;
9834 	}
9835 
9836 	/* Send unmask irq only for interrupts not classified as MSG */
9837 	if (!gaudi2_irq_map_table[event_type].msg)
9838 		hl_fw_unmask_irq(hdev, event_type);
9839 
9840 	if (event_mask)
9841 		hl_notifier_event_send_all(hdev, event_mask);
9842 
9843 	return;
9844 
9845 reset_device:
9846 	if (hdev->asic_prop.fw_security_enabled && is_critical) {
9847 		reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
9848 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
9849 	} else {
9850 		reset_flags |= HL_DRV_RESET_DELAY;
9851 	}
9852 	/* escalate general hw errors to critical/fatal error */
9853 	if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
9854 		hl_handle_critical_hw_err(hdev, event_type, &event_mask);
9855 
9856 	event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
9857 	hl_device_cond_reset(hdev, reset_flags, event_mask);
9858 }
9859 
9860 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
9861 			struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr,
9862 			u32 hw_queue_id, u32 size, u64 addr, u32 val)
9863 {
9864 	u32 ctl, pkt_size;
9865 	int rc = 0;
9866 
9867 	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
9868 	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
9869 	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1);
9870 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1);
9871 
9872 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
9873 	lin_dma_pkt->src_addr = cpu_to_le64(val);
9874 	lin_dma_pkt->dst_addr = cpu_to_le64(addr);
9875 	lin_dma_pkt->tsize = cpu_to_le32(size);
9876 
9877 	pkt_size = sizeof(struct packet_lin_dma);
9878 
9879 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
9880 	if (rc)
9881 		dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n",
9882 				hw_queue_id);
9883 
9884 	return rc;
9885 }
9886 
9887 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
9888 {
9889 	u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
9890 					GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
9891 					GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
9892 					GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
9893 	u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
9894 		old_mmubp, mmubp, num_of_pkts, busy, pkt_size;
9895 	u64 comp_addr, cur_addr = addr, end_addr = addr + size;
9896 	struct asic_fixed_properties *prop = &hdev->asic_prop;
9897 	void *lin_dma_pkts_arr;
9898 	dma_addr_t pkt_dma_addr;
9899 	int rc = 0, dma_num = 0;
9900 
9901 	if (prop->edma_enabled_mask == 0) {
9902 		dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
9903 		return -EIO;
9904 	}
9905 
9906 	sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
9907 	sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
9908 	comp_addr = CFG_BASE + sob_addr;
9909 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
9910 		FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
9911 	mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
9912 		FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
9913 
9914 	/* Calculate how many lin dma pkts we'll need */
9915 	num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
9916 	pkt_size = sizeof(struct packet_lin_dma);
9917 
9918 	lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts,
9919 					&pkt_dma_addr, GFP_KERNEL);
9920 	if (!lin_dma_pkts_arr)
9921 		return -ENOMEM;
9922 
9923 	/*
9924 	 * set mmu bypass for the scrubbing - all ddmas are configured the same so save
9925 	 * only the first one to restore later
9926 	 * also set the sob addr for all edma cores for completion.
9927 	 * set QM as trusted to allow it to access physical address with MMU bp.
9928 	 */
9929 	old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
9930 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9931 		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9932 			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
9933 			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9934 
9935 			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9936 				continue;
9937 
9938 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
9939 					edma_offset, mmubp);
9940 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
9941 					lower_32_bits(comp_addr));
9942 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
9943 					upper_32_bits(comp_addr));
9944 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
9945 					comp_val);
9946 			gaudi2_qman_set_test_mode(hdev,
9947 					edma_queues_id[dcore] + 4 * edma_idx, true);
9948 		}
9949 	}
9950 
9951 	WREG32(sob_addr, 0);
9952 
9953 	while (cur_addr < end_addr) {
9954 		for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9955 			for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9956 				u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9957 
9958 				if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9959 					continue;
9960 
9961 				chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
9962 
9963 				rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
9964 					(struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
9965 					pkt_dma_addr + dma_num * pkt_size,
9966 					edma_queues_id[dcore] + edma_idx * 4,
9967 					chunk_size, cur_addr, val);
9968 				if (rc)
9969 					goto end;
9970 
9971 				dma_num++;
9972 				cur_addr += chunk_size;
9973 				if (cur_addr == end_addr)
9974 					break;
9975 			}
9976 		}
9977 	}
9978 
9979 	rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
9980 	if (rc) {
9981 		dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
9982 		goto end;
9983 	}
9984 end:
9985 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
9986 		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
9987 			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
9988 			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
9989 
9990 			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
9991 				continue;
9992 
9993 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
9994 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0);
9995 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0);
9996 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0);
9997 			gaudi2_qman_set_test_mode(hdev,
9998 					edma_queues_id[dcore] + 4 * edma_idx, false);
9999 		}
10000 	}
10001 
10002 	WREG32(sob_addr, 0);
10003 	hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr);
10004 
10005 	return rc;
10006 }
10007 
10008 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val)
10009 {
10010 	int rc;
10011 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10012 	u64 size = prop->dram_end_address - prop->dram_user_base_address;
10013 
10014 	rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val);
10015 
10016 	if (rc)
10017 		dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n",
10018 				prop->dram_user_base_address, size);
10019 	return rc;
10020 }
10021 
10022 static int gaudi2_scrub_device_mem(struct hl_device *hdev)
10023 {
10024 	int rc;
10025 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10026 	u64 val = hdev->memory_scrub_val;
10027 	u64 addr, size;
10028 
10029 	if (!hdev->memory_scrub)
10030 		return 0;
10031 
10032 	/* scrub SRAM */
10033 	addr = prop->sram_user_base_address;
10034 	size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET);
10035 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n",
10036 			addr, addr + size, val);
10037 	rc = gaudi2_memset_device_memory(hdev, addr, size, val);
10038 	if (rc) {
10039 		dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc);
10040 		return rc;
10041 	}
10042 
10043 	/* scrub DRAM */
10044 	rc = gaudi2_scrub_device_dram(hdev, val);
10045 	if (rc) {
10046 		dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc);
10047 		return rc;
10048 	}
10049 	return 0;
10050 }
10051 
10052 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
10053 {
10054 	u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr,
10055 		cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr;
10056 	u32 val, size, offset;
10057 	int dcore_id;
10058 
10059 	offset = hdev->asic_prop.first_available_cq[0] * 4;
10060 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset;
10061 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset;
10062 	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset;
10063 	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset;
10064 	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset;
10065 	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset;
10066 	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 -
10067 			(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset);
10068 
10069 	/* memset dcore0 CQ registers */
10070 	gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10071 	gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10072 	gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10073 	gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10074 	gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10075 	gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10076 
10077 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
10078 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
10079 	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET;
10080 	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET;
10081 	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET;
10082 	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET;
10083 	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0;
10084 
10085 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10086 		gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10087 		gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10088 		gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10089 		gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10090 		gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10091 		gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10092 
10093 		cq_lbw_l_addr += DCORE_OFFSET;
10094 		cq_lbw_h_addr += DCORE_OFFSET;
10095 		cq_lbw_data_addr += DCORE_OFFSET;
10096 		cq_base_l_addr += DCORE_OFFSET;
10097 		cq_base_h_addr += DCORE_OFFSET;
10098 		cq_size_addr += DCORE_OFFSET;
10099 	}
10100 
10101 	offset = hdev->asic_prop.first_available_user_mon[0] * 4;
10102 	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset;
10103 	val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT;
10104 	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset);
10105 
10106 	/* memset dcore0 monitors */
10107 	gaudi2_memset_device_lbw(hdev, addr, size, val);
10108 
10109 	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset;
10110 	gaudi2_memset_device_lbw(hdev, addr, size, 0);
10111 
10112 	mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET;
10113 	mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET;
10114 	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0;
10115 
10116 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10117 		gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val);
10118 		gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0);
10119 		mon_sts_addr += DCORE_OFFSET;
10120 		mon_cfg_addr += DCORE_OFFSET;
10121 	}
10122 
10123 	offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10124 	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset;
10125 	val = 0;
10126 	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 -
10127 			(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10128 
10129 	/* memset dcore0 sobs */
10130 	gaudi2_memset_device_lbw(hdev, addr, size, val);
10131 
10132 	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET;
10133 	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0;
10134 
10135 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10136 		gaudi2_memset_device_lbw(hdev, addr, size, val);
10137 		addr += DCORE_OFFSET;
10138 	}
10139 
10140 	/* Flush all WREG to prevent race */
10141 	val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10142 }
10143 
10144 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev)
10145 {
10146 	u32 reg_base, hw_queue_id;
10147 
10148 	for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0;
10149 							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10150 		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10151 			continue;
10152 
10153 		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10154 
10155 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10156 		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10157 	}
10158 
10159 	/* Flush all WREG to prevent race */
10160 	RREG32(mmPDMA0_QM_ARB_CFG_0);
10161 }
10162 
10163 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev)
10164 {
10165 	u32 reg_base, hw_queue_id;
10166 
10167 	for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3;
10168 							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10169 		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10170 			continue;
10171 
10172 		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10173 
10174 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10175 		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10176 	}
10177 
10178 	/* Flush all WREG to prevent race */
10179 	RREG32(mmPDMA0_QM_ARB_CFG_0);
10180 }
10181 
10182 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid)
10183 {
10184 	return 0;
10185 }
10186 
10187 static void gaudi2_restore_phase_topology(struct hl_device *hdev)
10188 {
10189 }
10190 
10191 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx,
10192 						struct dup_block_ctx *cfg_ctx)
10193 {
10194 	u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off;
10195 	u8 seq;
10196 	int i;
10197 
10198 	for (i = 0 ; i < cfg_ctx->instances ; i++) {
10199 		seq = block_idx * cfg_ctx->instances + i;
10200 
10201 		/* skip disabled instance */
10202 		if (!(cfg_ctx->enabled_mask & BIT_ULL(seq)))
10203 			continue;
10204 
10205 		cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off,
10206 					cfg_ctx->data);
10207 	}
10208 }
10209 
10210 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx,
10211 						u64 mask)
10212 {
10213 	int i;
10214 
10215 	cfg_ctx->enabled_mask = mask;
10216 
10217 	for (i = 0 ; i < cfg_ctx->blocks ; i++)
10218 		gaudi2_init_block_instances(hdev, i, cfg_ctx);
10219 }
10220 
10221 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx)
10222 {
10223 	gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX);
10224 }
10225 
10226 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
10227 {
10228 	void *host_mem_virtual_addr;
10229 	dma_addr_t host_mem_dma_addr;
10230 	u64 reserved_va_base;
10231 	u32 pos, size_left, size_to_dma;
10232 	struct hl_ctx *ctx;
10233 	int rc = 0;
10234 
10235 	/* Fetch the ctx */
10236 	ctx = hl_get_compute_ctx(hdev);
10237 	if (!ctx) {
10238 		dev_err(hdev->dev, "No ctx available\n");
10239 		return -EINVAL;
10240 	}
10241 
10242 	/* Allocate buffers for read and for poll */
10243 	host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr,
10244 								GFP_KERNEL | __GFP_ZERO);
10245 	if (host_mem_virtual_addr == NULL) {
10246 		dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n");
10247 		rc = -ENOMEM;
10248 		goto put_ctx;
10249 	}
10250 
10251 	/* Reserve VM region on asic side */
10252 	reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M,
10253 						HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10254 	if (!reserved_va_base) {
10255 		dev_err(hdev->dev, "Failed to reserve vmem on asic\n");
10256 		rc = -ENOMEM;
10257 		goto free_data_buffer;
10258 	}
10259 
10260 	/* Create mapping on asic side */
10261 	mutex_lock(&hdev->mmu_lock);
10262 
10263 	rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
10264 	if (rc) {
10265 		dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
10266 		goto unreserve_va;
10267 	}
10268 
10269 	rc = hl_mmu_invalidate_cache_range(hdev, false,
10270 				      MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
10271 				      ctx->asid, reserved_va_base, SZ_2M);
10272 	if (rc) {
10273 		hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10274 		goto unreserve_va;
10275 	}
10276 
10277 	mutex_unlock(&hdev->mmu_lock);
10278 
10279 	/* Enable MMU on KDMA */
10280 	gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
10281 
10282 	pos = 0;
10283 	size_left = size;
10284 	size_to_dma = SZ_2M;
10285 
10286 	while (size_left > 0) {
10287 		if (size_left < SZ_2M)
10288 			size_to_dma = size_left;
10289 
10290 		rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false);
10291 		if (rc)
10292 			break;
10293 
10294 		memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma);
10295 
10296 		if (size_left <= SZ_2M)
10297 			break;
10298 
10299 		pos += SZ_2M;
10300 		addr += SZ_2M;
10301 		size_left -= SZ_2M;
10302 	}
10303 
10304 	gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
10305 
10306 	mutex_lock(&hdev->mmu_lock);
10307 
10308 	rc = hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10309 	if (rc)
10310 		goto unreserve_va;
10311 
10312 	rc = hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
10313 				      ctx->asid, reserved_va_base, SZ_2M);
10314 
10315 unreserve_va:
10316 	mutex_unlock(&hdev->mmu_lock);
10317 	hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
10318 free_data_buffer:
10319 	hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
10320 put_ctx:
10321 	hl_ctx_put(ctx);
10322 
10323 	return rc;
10324 }
10325 
10326 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx)
10327 {
10328 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10329 	int min_alloc_order, rc;
10330 
10331 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10332 		return 0;
10333 
10334 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
10335 								HOST_SPACE_INTERNAL_CB_SZ,
10336 								&hdev->internal_cb_pool_dma_addr,
10337 								GFP_KERNEL | __GFP_ZERO);
10338 
10339 	if (!hdev->internal_cb_pool_virt_addr)
10340 		return -ENOMEM;
10341 
10342 	min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev),
10343 					gaudi2_get_wait_cb_size(hdev)));
10344 
10345 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
10346 	if (!hdev->internal_cb_pool) {
10347 		dev_err(hdev->dev, "Failed to create internal CB pool\n");
10348 		rc = -ENOMEM;
10349 		goto free_internal_cb_pool;
10350 	}
10351 
10352 	rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr,
10353 				HOST_SPACE_INTERNAL_CB_SZ, -1);
10354 	if (rc) {
10355 		dev_err(hdev->dev, "Failed to add memory to internal CB pool\n");
10356 		rc = -EFAULT;
10357 		goto destroy_internal_cb_pool;
10358 	}
10359 
10360 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
10361 					HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10362 
10363 	if (!hdev->internal_cb_va_base) {
10364 		rc = -ENOMEM;
10365 		goto destroy_internal_cb_pool;
10366 	}
10367 
10368 	mutex_lock(&hdev->mmu_lock);
10369 
10370 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
10371 					HOST_SPACE_INTERNAL_CB_SZ);
10372 	if (rc)
10373 		goto unreserve_internal_cb_pool;
10374 
10375 	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
10376 	if (rc)
10377 		goto unmap_internal_cb_pool;
10378 
10379 	mutex_unlock(&hdev->mmu_lock);
10380 
10381 	return 0;
10382 
10383 unmap_internal_cb_pool:
10384 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10385 unreserve_internal_cb_pool:
10386 	mutex_unlock(&hdev->mmu_lock);
10387 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10388 destroy_internal_cb_pool:
10389 	gen_pool_destroy(hdev->internal_cb_pool);
10390 free_internal_cb_pool:
10391 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10392 					hdev->internal_cb_pool_dma_addr);
10393 
10394 	return rc;
10395 }
10396 
10397 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx)
10398 {
10399 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10400 
10401 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10402 		return;
10403 
10404 	mutex_lock(&hdev->mmu_lock);
10405 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10406 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10407 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
10408 	mutex_unlock(&hdev->mmu_lock);
10409 
10410 	gen_pool_destroy(hdev->internal_cb_pool);
10411 
10412 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10413 					hdev->internal_cb_pool_dma_addr);
10414 }
10415 
10416 static void gaudi2_restore_user_registers(struct hl_device *hdev)
10417 {
10418 	gaudi2_restore_user_sm_registers(hdev);
10419 	gaudi2_restore_user_qm_registers(hdev);
10420 }
10421 
10422 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10423 {
10424 	struct hl_device *hdev = ctx->hdev;
10425 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10426 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10427 	int rc;
10428 
10429 	rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10430 				gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
10431 	if (rc)
10432 		dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
10433 			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10434 
10435 	return rc;
10436 }
10437 
10438 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10439 {
10440 	struct hl_device *hdev = ctx->hdev;
10441 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10442 	int rc;
10443 
10444 	rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10445 				prop->pmmu.page_size, true);
10446 	if (rc)
10447 		dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
10448 			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10449 }
10450 
10451 static int gaudi2_ctx_init(struct hl_ctx *ctx)
10452 {
10453 	int rc;
10454 
10455 	rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
10456 	if (rc)
10457 		return rc;
10458 
10459 	/* No need to clear user registers if the device has just
10460 	 * performed reset, we restore only nic qm registers
10461 	 */
10462 	if (ctx->hdev->reset_upon_device_release)
10463 		gaudi2_restore_nic_qm_registers(ctx->hdev);
10464 	else
10465 		gaudi2_restore_user_registers(ctx->hdev);
10466 
10467 	rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
10468 	if (rc)
10469 		return rc;
10470 
10471 	rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
10472 	if (rc)
10473 		gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10474 
10475 	return rc;
10476 }
10477 
10478 static void gaudi2_ctx_fini(struct hl_ctx *ctx)
10479 {
10480 	if (ctx->asid == HL_KERNEL_ASID_ID)
10481 		return;
10482 
10483 	gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10484 
10485 	gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
10486 }
10487 
10488 static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
10489 {
10490 	struct hl_device *hdev = cs->ctx->hdev;
10491 	int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
10492 	u32 mon_payload, sob_id, mon_id;
10493 
10494 	if (!cs_needs_completion(cs))
10495 		return 0;
10496 
10497 	/*
10498 	 * First 64 SOB/MON are reserved for driver for QMAN auto completion
10499 	 * mechanism. Each SOB/MON pair are used for a pending CS with the same
10500 	 * cyclic index. The SOB value is increased when each of the CS jobs is
10501 	 * completed. When the SOB reaches the number of CS jobs, the monitor
10502 	 * generates MSI-X interrupt.
10503 	 */
10504 
10505 	sob_id = mon_id = index;
10506 	mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
10507 				(1 << CQ_ENTRY_READY_SHIFT) | index;
10508 
10509 	gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
10510 				cs->jobs_cnt);
10511 
10512 	return 0;
10513 }
10514 
10515 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
10516 {
10517 	return HL_INVALID_QUEUE;
10518 }
10519 
10520 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
10521 {
10522 	struct hl_cb *cb = data;
10523 	struct packet_msg_short *pkt;
10524 	u32 value, ctl, pkt_size = sizeof(*pkt);
10525 
10526 	pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size);
10527 	memset(pkt, 0, pkt_size);
10528 
10529 	/* Inc by 1, Mode ADD */
10530 	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
10531 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
10532 
10533 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
10534 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */
10535 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10536 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb);
10537 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10538 
10539 	pkt->value = cpu_to_le32(value);
10540 	pkt->ctl = cpu_to_le32(ctl);
10541 
10542 	return size + pkt_size;
10543 }
10544 
10545 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)
10546 {
10547 	u32 ctl, pkt_size = sizeof(*pkt);
10548 
10549 	memset(pkt, 0, pkt_size);
10550 
10551 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10552 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0);  /* MON base */
10553 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10554 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10555 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0);
10556 
10557 	pkt->value = cpu_to_le32(value);
10558 	pkt->ctl = cpu_to_le32(ctl);
10559 
10560 	return pkt_size;
10561 }
10562 
10563 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt,
10564 					u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr)
10565 {
10566 	u32 ctl, value, pkt_size = sizeof(*pkt);
10567 	u8 mask;
10568 
10569 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
10570 		dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask);
10571 		return 0;
10572 	}
10573 
10574 	memset(pkt, 0, pkt_size);
10575 
10576 	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
10577 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
10578 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/
10579 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask);
10580 
10581 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10582 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
10583 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10584 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10585 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10586 
10587 	pkt->value = cpu_to_le32(value);
10588 	pkt->ctl = cpu_to_le32(ctl);
10589 
10590 	return pkt_size;
10591 }
10592 
10593 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
10594 {
10595 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
10596 
10597 	memset(pkt, 0, pkt_size);
10598 
10599 	cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
10600 	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
10601 	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2);
10602 
10603 	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
10604 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10605 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10606 
10607 	pkt->cfg = cpu_to_le32(cfg);
10608 	pkt->ctl = cpu_to_le32(ctl);
10609 
10610 	return pkt_size;
10611 }
10612 
10613 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
10614 {
10615 	struct hl_cb *cb = prop->data;
10616 	void *buf = (void *) (uintptr_t) (cb->kernel_address);
10617 
10618 	u64 monitor_base, fence_addr = 0;
10619 	u32 stream_index, size = prop->size;
10620 	u16 msg_addr_offset;
10621 
10622 	stream_index = prop->q_idx % 4;
10623 	fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] +
10624 			QM_FENCE2_OFFSET + stream_index * 4;
10625 
10626 	/*
10627 	 * monitor_base should be the content of the base0 address registers,
10628 	 * so it will be added to the msg short offsets
10629 	 */
10630 	monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
10631 
10632 	/* First monitor config packet: low address of the sync */
10633 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) -
10634 				monitor_base;
10635 
10636 	size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset);
10637 
10638 	/* Second monitor config packet: high address of the sync */
10639 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) -
10640 				monitor_base;
10641 
10642 	size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset);
10643 
10644 	/*
10645 	 * Third monitor config packet: the payload, i.e. what to write when the
10646 	 * sync triggers
10647 	 */
10648 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) -
10649 				monitor_base;
10650 
10651 	size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset);
10652 
10653 	/* Fourth monitor config packet: bind the monitor to a sync object */
10654 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base;
10655 
10656 	size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask,
10657 						prop->sob_val, msg_addr_offset);
10658 
10659 	/* Fence packet */
10660 	size += gaudi2_add_fence_pkt(buf + size);
10661 
10662 	return size;
10663 }
10664 
10665 static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
10666 {
10667 	struct hl_hw_sob *hw_sob = data;
10668 
10669 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
10670 
10671 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0);
10672 
10673 	kref_init(&hw_sob->kref);
10674 }
10675 
10676 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group)
10677 {
10678 }
10679 
10680 static u64 gaudi2_get_device_time(struct hl_device *hdev)
10681 {
10682 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
10683 
10684 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
10685 }
10686 
10687 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs)
10688 {
10689 	return 0;
10690 }
10691 
10692 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx,
10693 					struct hl_cs *cs, u32 wait_queue_id,
10694 					u32 collective_engine_id, u32 encaps_signal_offset)
10695 {
10696 	return -EINVAL;
10697 }
10698 
10699 /*
10700  * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address
10701  *                   to DMMU page-size address (64MB) before mapping it in
10702  *                   the MMU.
10703  * The operation is performed on both the virtual and physical addresses.
10704  * for device with 6 HBMs the scramble is:
10705  * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48]
10706  *
10707  * Example:
10708  * =============================================================================
10709  * Allocated DRAM  Reserved VA      scrambled VA for MMU mapping    Scrambled PA
10710  * Phys address                                                     in MMU last
10711  *                                                                    HOP
10712  * =============================================================================
10713  * PA1 0x3000000  VA1 0x9C000000  SVA1= (VA1/48M)*64M 0xD0000000  <- PA1/48M 0x1
10714  * PA2 0x9000000  VA2 0x9F000000  SVA2= (VA2/48M)*64M 0xD4000000  <- PA2/48M 0x3
10715  * =============================================================================
10716  */
10717 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr)
10718 {
10719 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10720 	u32 divisor, mod_va;
10721 	u64 div_va;
10722 
10723 	/* accept any address in the DRAM address space */
10724 	if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE,
10725 									VA_HBM_SPACE_END)) {
10726 
10727 		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10728 		div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va);
10729 		return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) |
10730 			(div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) |
10731 			(mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT);
10732 	}
10733 
10734 	return raw_addr;
10735 }
10736 
10737 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr)
10738 {
10739 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10740 	u32 divisor, mod_va;
10741 	u64 div_va;
10742 
10743 	/* accept any address in the DRAM address space */
10744 	if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE,
10745 									VA_HBM_SPACE_END)) {
10746 
10747 		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10748 		div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK,
10749 					PAGE_SIZE_64MB, &mod_va);
10750 
10751 		return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) +
10752 					(div_va * divisor + mod_va));
10753 	}
10754 
10755 	return scrambled_addr;
10756 }
10757 
10758 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id)
10759 {
10760 	u32 base = 0, dcore_id, dec_id;
10761 
10762 	if (core_id >= NUMBER_OF_DEC) {
10763 		dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id);
10764 		goto out;
10765 	}
10766 
10767 	if (core_id < 8) {
10768 		dcore_id = core_id / NUM_OF_DEC_PER_DCORE;
10769 		dec_id = core_id % NUM_OF_DEC_PER_DCORE;
10770 
10771 		base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET +
10772 				dec_id * DCORE_VDEC_OFFSET;
10773 	} else {
10774 		/* PCIe Shared Decoder */
10775 		base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET);
10776 	}
10777 out:
10778 	return base;
10779 }
10780 
10781 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
10782 				u32 *block_size, u32 *block_id)
10783 {
10784 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10785 	int i;
10786 
10787 	for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) {
10788 		if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) {
10789 			*block_id = i;
10790 			if (block_size)
10791 				*block_size = gaudi2->mapped_blocks[i].size;
10792 			return 0;
10793 		}
10794 	}
10795 
10796 	dev_err(hdev->dev, "Invalid block address %#llx", block_addr);
10797 
10798 	return -EINVAL;
10799 }
10800 
10801 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
10802 			u32 block_id, u32 block_size)
10803 {
10804 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10805 	u64 offset_in_bar;
10806 	u64 address;
10807 	int rc;
10808 
10809 	if (block_id >= NUM_USER_MAPPED_BLOCKS) {
10810 		dev_err(hdev->dev, "Invalid block id %u", block_id);
10811 		return -EINVAL;
10812 	}
10813 
10814 	/* we allow mapping only an entire block */
10815 	if (block_size != gaudi2->mapped_blocks[block_id].size) {
10816 		dev_err(hdev->dev, "Invalid block size %u", block_size);
10817 		return -EINVAL;
10818 	}
10819 
10820 	offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR;
10821 
10822 	address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
10823 
10824 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
10825 			VM_DONTCOPY | VM_NORESERVE);
10826 
10827 	rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
10828 			block_size, vma->vm_page_prot);
10829 	if (rc)
10830 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
10831 
10832 	return rc;
10833 }
10834 
10835 static void gaudi2_enable_events_from_fw(struct hl_device *hdev)
10836 {
10837 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10838 
10839 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
10840 	u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq);
10841 
10842 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
10843 		WREG32(irq_handler_offset,
10844 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id);
10845 }
10846 
10847 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base)
10848 {
10849 	switch (mmu_id) {
10850 	case HW_CAP_DCORE0_DMMU0:
10851 		*mmu_base = mmDCORE0_HMMU0_MMU_BASE;
10852 		break;
10853 	case HW_CAP_DCORE0_DMMU1:
10854 		*mmu_base = mmDCORE0_HMMU1_MMU_BASE;
10855 		break;
10856 	case HW_CAP_DCORE0_DMMU2:
10857 		*mmu_base = mmDCORE0_HMMU2_MMU_BASE;
10858 		break;
10859 	case HW_CAP_DCORE0_DMMU3:
10860 		*mmu_base = mmDCORE0_HMMU3_MMU_BASE;
10861 		break;
10862 	case HW_CAP_DCORE1_DMMU0:
10863 		*mmu_base = mmDCORE1_HMMU0_MMU_BASE;
10864 		break;
10865 	case HW_CAP_DCORE1_DMMU1:
10866 		*mmu_base = mmDCORE1_HMMU1_MMU_BASE;
10867 		break;
10868 	case HW_CAP_DCORE1_DMMU2:
10869 		*mmu_base = mmDCORE1_HMMU2_MMU_BASE;
10870 		break;
10871 	case HW_CAP_DCORE1_DMMU3:
10872 		*mmu_base = mmDCORE1_HMMU3_MMU_BASE;
10873 		break;
10874 	case HW_CAP_DCORE2_DMMU0:
10875 		*mmu_base = mmDCORE2_HMMU0_MMU_BASE;
10876 		break;
10877 	case HW_CAP_DCORE2_DMMU1:
10878 		*mmu_base = mmDCORE2_HMMU1_MMU_BASE;
10879 		break;
10880 	case HW_CAP_DCORE2_DMMU2:
10881 		*mmu_base = mmDCORE2_HMMU2_MMU_BASE;
10882 		break;
10883 	case HW_CAP_DCORE2_DMMU3:
10884 		*mmu_base = mmDCORE2_HMMU3_MMU_BASE;
10885 		break;
10886 	case HW_CAP_DCORE3_DMMU0:
10887 		*mmu_base = mmDCORE3_HMMU0_MMU_BASE;
10888 		break;
10889 	case HW_CAP_DCORE3_DMMU1:
10890 		*mmu_base = mmDCORE3_HMMU1_MMU_BASE;
10891 		break;
10892 	case HW_CAP_DCORE3_DMMU2:
10893 		*mmu_base = mmDCORE3_HMMU2_MMU_BASE;
10894 		break;
10895 	case HW_CAP_DCORE3_DMMU3:
10896 		*mmu_base = mmDCORE3_HMMU3_MMU_BASE;
10897 		break;
10898 	case HW_CAP_PMMU:
10899 		*mmu_base = mmPMMU_HBW_MMU_BASE;
10900 		break;
10901 	default:
10902 		return -EINVAL;
10903 	}
10904 
10905 	return 0;
10906 }
10907 
10908 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
10909 {
10910 	bool is_pmmu = (mmu_id == HW_CAP_PMMU);
10911 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10912 	u32 mmu_base;
10913 
10914 	if (!(gaudi2->hw_cap_initialized & mmu_id))
10915 		return;
10916 
10917 	if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
10918 		return;
10919 
10920 	gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL);
10921 	gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
10922 }
10923 
10924 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
10925 {
10926 	u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES;
10927 
10928 	/* check all HMMUs */
10929 	for (i = 0 ; i < num_of_hmmus ; i++) {
10930 		mmu_id = HW_CAP_DCORE0_DMMU0 << i;
10931 
10932 		if (mmu_cap_mask & mmu_id)
10933 			gaudi2_ack_mmu_error(hdev, mmu_id);
10934 	}
10935 
10936 	/* check PMMU */
10937 	if (mmu_cap_mask & HW_CAP_PMMU)
10938 		gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU);
10939 
10940 	return 0;
10941 }
10942 
10943 static void gaudi2_get_msi_info(__le32 *table)
10944 {
10945 	table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
10946 }
10947 
10948 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
10949 {
10950 	switch (pll_idx) {
10951 	case HL_GAUDI2_CPU_PLL: return CPU_PLL;
10952 	case HL_GAUDI2_PCI_PLL: return PCI_PLL;
10953 	case HL_GAUDI2_NIC_PLL: return NIC_PLL;
10954 	case HL_GAUDI2_DMA_PLL: return DMA_PLL;
10955 	case HL_GAUDI2_MESH_PLL: return MESH_PLL;
10956 	case HL_GAUDI2_MME_PLL: return MME_PLL;
10957 	case HL_GAUDI2_TPC_PLL: return TPC_PLL;
10958 	case HL_GAUDI2_IF_PLL: return IF_PLL;
10959 	case HL_GAUDI2_SRAM_PLL: return SRAM_PLL;
10960 	case HL_GAUDI2_HBM_PLL: return HBM_PLL;
10961 	case HL_GAUDI2_VID_PLL: return VID_PLL;
10962 	case HL_GAUDI2_MSS_PLL: return MSS_PLL;
10963 	default: return -EINVAL;
10964 	}
10965 }
10966 
10967 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map)
10968 {
10969 	/* Not implemented */
10970 	return 0;
10971 }
10972 
10973 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon)
10974 {
10975 	/* Not implemented */
10976 	return 0;
10977 }
10978 
10979 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset,
10980 				struct hl_device *hdev, struct hl_mon_state_dump *mon)
10981 {
10982 	/* Not implemented */
10983 	return 0;
10984 }
10985 
10986 
10987 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset,
10988 				u64 status_base_offset, enum hl_sync_engine_type engine_type,
10989 				u32 engine_id, char **buf, size_t *size, size_t *offset)
10990 {
10991 	/* Not implemented */
10992 	return 0;
10993 }
10994 
10995 
10996 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = {
10997 	.monitor_valid = gaudi2_monitor_valid,
10998 	.print_single_monitor = gaudi2_print_single_monitor,
10999 	.gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map,
11000 	.print_fences_single_engine = gaudi2_print_fences_single_engine,
11001 };
11002 
11003 static void gaudi2_state_dump_init(struct hl_device *hdev)
11004 {
11005 	/* Not implemented */
11006 	hdev->state_dump_specs.props = gaudi2_state_dump_specs_props;
11007 	hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs;
11008 }
11009 
11010 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id)
11011 {
11012 	return 0;
11013 }
11014 
11015 static u32 *gaudi2_get_stream_master_qid_arr(void)
11016 {
11017 	return NULL;
11018 }
11019 
11020 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
11021 				struct attribute_group *dev_vrm_attr_grp)
11022 {
11023 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
11024 	hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp);
11025 }
11026 
11027 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
11028 					u32 page_size, u32 *real_page_size, bool is_dram_addr)
11029 {
11030 	struct asic_fixed_properties *prop = &hdev->asic_prop;
11031 
11032 	/* for host pages the page size must be  */
11033 	if (!is_dram_addr) {
11034 		if (page_size % mmu_prop->page_size)
11035 			goto page_size_err;
11036 
11037 		*real_page_size = mmu_prop->page_size;
11038 		return 0;
11039 	}
11040 
11041 	if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size))
11042 		goto page_size_err;
11043 
11044 	/*
11045 	 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater
11046 	 * than DRAM page size).
11047 	 * for this reason work with the DRAM page size and let the MMU scrambling routine handle
11048 	 * this mismatch when calculating the address to place in the MMU page table.
11049 	 * (in that case also make sure that the dram_page_size is not greater than the
11050 	 * mmu page size)
11051 	 */
11052 	*real_page_size = prop->dram_page_size;
11053 
11054 	return 0;
11055 
11056 page_size_err:
11057 	dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
11058 							page_size, mmu_prop->page_size >> 10);
11059 	return -EFAULT;
11060 }
11061 
11062 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
11063 {
11064 	return -EOPNOTSUPP;
11065 }
11066 
11067 int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
11068 {
11069 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
11070 
11071 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
11072 		return 0;
11073 
11074 	return hl_fw_send_device_activity(hdev, open);
11075 }
11076 
11077 static const struct hl_asic_funcs gaudi2_funcs = {
11078 	.early_init = gaudi2_early_init,
11079 	.early_fini = gaudi2_early_fini,
11080 	.late_init = gaudi2_late_init,
11081 	.late_fini = gaudi2_late_fini,
11082 	.sw_init = gaudi2_sw_init,
11083 	.sw_fini = gaudi2_sw_fini,
11084 	.hw_init = gaudi2_hw_init,
11085 	.hw_fini = gaudi2_hw_fini,
11086 	.halt_engines = gaudi2_halt_engines,
11087 	.suspend = gaudi2_suspend,
11088 	.resume = gaudi2_resume,
11089 	.mmap = gaudi2_mmap,
11090 	.ring_doorbell = gaudi2_ring_doorbell,
11091 	.pqe_write = gaudi2_pqe_write,
11092 	.asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent,
11093 	.asic_dma_free_coherent = gaudi2_dma_free_coherent,
11094 	.scrub_device_mem = gaudi2_scrub_device_mem,
11095 	.scrub_device_dram = gaudi2_scrub_device_dram,
11096 	.get_int_queue_base = NULL,
11097 	.test_queues = gaudi2_test_queues,
11098 	.asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc,
11099 	.asic_dma_pool_free = gaudi2_dma_pool_free,
11100 	.cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
11101 	.cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
11102 	.asic_dma_unmap_single = gaudi2_dma_unmap_single,
11103 	.asic_dma_map_single = gaudi2_dma_map_single,
11104 	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
11105 	.cs_parser = gaudi2_cs_parser,
11106 	.asic_dma_map_sgtable = hl_dma_map_sgtable,
11107 	.add_end_of_cb_packets = NULL,
11108 	.update_eq_ci = gaudi2_update_eq_ci,
11109 	.context_switch = gaudi2_context_switch,
11110 	.restore_phase_topology = gaudi2_restore_phase_topology,
11111 	.debugfs_read_dma = gaudi2_debugfs_read_dma,
11112 	.add_device_attr = gaudi2_add_device_attr,
11113 	.handle_eqe = gaudi2_handle_eqe,
11114 	.get_events_stat = gaudi2_get_events_stat,
11115 	.read_pte = NULL,
11116 	.write_pte = NULL,
11117 	.mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
11118 	.mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
11119 	.mmu_prefetch_cache_range = NULL,
11120 	.send_heartbeat = gaudi2_send_heartbeat,
11121 	.debug_coresight = gaudi2_debug_coresight,
11122 	.is_device_idle = gaudi2_is_device_idle,
11123 	.compute_reset_late_init = gaudi2_compute_reset_late_init,
11124 	.hw_queues_lock = gaudi2_hw_queues_lock,
11125 	.hw_queues_unlock = gaudi2_hw_queues_unlock,
11126 	.get_pci_id = gaudi2_get_pci_id,
11127 	.get_eeprom_data = gaudi2_get_eeprom_data,
11128 	.get_monitor_dump = gaudi2_get_monitor_dump,
11129 	.send_cpu_message = gaudi2_send_cpu_message,
11130 	.pci_bars_map = gaudi2_pci_bars_map,
11131 	.init_iatu = gaudi2_init_iatu,
11132 	.rreg = hl_rreg,
11133 	.wreg = hl_wreg,
11134 	.halt_coresight = gaudi2_halt_coresight,
11135 	.ctx_init = gaudi2_ctx_init,
11136 	.ctx_fini = gaudi2_ctx_fini,
11137 	.pre_schedule_cs = gaudi2_pre_schedule_cs,
11138 	.get_queue_id_for_cq = gaudi2_get_queue_id_for_cq,
11139 	.load_firmware_to_device = NULL,
11140 	.load_boot_fit_to_device = NULL,
11141 	.get_signal_cb_size = gaudi2_get_signal_cb_size,
11142 	.get_wait_cb_size = gaudi2_get_wait_cb_size,
11143 	.gen_signal_cb = gaudi2_gen_signal_cb,
11144 	.gen_wait_cb = gaudi2_gen_wait_cb,
11145 	.reset_sob = gaudi2_reset_sob,
11146 	.reset_sob_group = gaudi2_reset_sob_group,
11147 	.get_device_time = gaudi2_get_device_time,
11148 	.pb_print_security_errors = gaudi2_pb_print_security_errors,
11149 	.collective_wait_init_cs = gaudi2_collective_wait_init_cs,
11150 	.collective_wait_create_jobs = gaudi2_collective_wait_create_jobs,
11151 	.get_dec_base_addr = gaudi2_get_dec_base_addr,
11152 	.scramble_addr = gaudi2_mmu_scramble_addr,
11153 	.descramble_addr = gaudi2_mmu_descramble_addr,
11154 	.ack_protection_bits_errors = gaudi2_ack_protection_bits_errors,
11155 	.get_hw_block_id = gaudi2_get_hw_block_id,
11156 	.hw_block_mmap = gaudi2_block_mmap,
11157 	.enable_events_from_fw = gaudi2_enable_events_from_fw,
11158 	.ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
11159 	.get_msi_info = gaudi2_get_msi_info,
11160 	.map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
11161 	.init_firmware_preload_params = gaudi2_init_firmware_preload_params,
11162 	.init_firmware_loader = gaudi2_init_firmware_loader,
11163 	.init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
11164 	.state_dump_init = gaudi2_state_dump_init,
11165 	.get_sob_addr = &gaudi2_get_sob_addr,
11166 	.set_pci_memory_regions = gaudi2_set_pci_memory_regions,
11167 	.get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr,
11168 	.check_if_razwi_happened = gaudi2_check_if_razwi_happened,
11169 	.mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
11170 	.access_dev_mem = hl_access_dev_mem,
11171 	.set_dram_bar_base = gaudi2_set_hbm_bar_base,
11172 	.set_engine_cores = gaudi2_set_engine_cores,
11173 	.set_engines = gaudi2_set_engines,
11174 	.send_device_activity = gaudi2_send_device_activity,
11175 	.set_dram_properties = gaudi2_set_dram_properties,
11176 	.set_binning_masks = gaudi2_set_binning_masks,
11177 };
11178 
11179 void gaudi2_set_asic_funcs(struct hl_device *hdev)
11180 {
11181 	hdev->asic_funcs = &gaudi2_funcs;
11182 }
11183