xref: /openbmc/linux/drivers/accel/habanalabs/gaudi2/gaudi2.c (revision 724ba6751532055db75992fc6ae21c3e322e94a7)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2020-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudi2P.h"
9 #include "gaudi2_masks.h"
10 #include "../include/gaudi2/gaudi2_special_blocks.h"
11 #include "../include/hw_ip/mmu/mmu_general.h"
12 #include "../include/hw_ip/mmu/mmu_v2_0.h"
13 #include "../include/gaudi2/gaudi2_packets.h"
14 #include "../include/gaudi2/gaudi2_reg_map.h"
15 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h"
16 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h"
17 
18 #include <linux/module.h>
19 #include <linux/pci.h>
20 #include <linux/hwmon.h>
21 #include <linux/iommu.h>
22 
23 #define GAUDI2_DMA_POOL_BLK_SIZE		SZ_256		/* 256 bytes */
24 
25 #define GAUDI2_RESET_TIMEOUT_MSEC		2000		/* 2000ms */
26 
27 #define GAUDI2_RESET_POLL_TIMEOUT_USEC		500000		/* 500ms */
28 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC		25000		/* 25s */
29 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC		25000		/* 25s */
30 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC	3000000		/* 3s */
31 #define GAUDI2_RESET_POLL_CNT			3
32 #define GAUDI2_RESET_WAIT_MSEC			1		/* 1ms */
33 #define GAUDI2_CPU_RESET_WAIT_MSEC		100		/* 100ms */
34 #define GAUDI2_PLDM_RESET_WAIT_MSEC		1000		/* 1s */
35 #define GAUDI2_CB_POOL_CB_CNT			512
36 #define GAUDI2_CB_POOL_CB_SIZE			SZ_128K		/* 128KB */
37 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC		4000000		/* 4s */
38 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC		25000000	/* 25s */
39 #define GAUDI2_TEST_QUEUE_WAIT_USEC		100000		/* 100ms */
40 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
41 
42 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT		3
43 
44 /*
45  * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs
46  * and the code relies on that value (for array size etc..) we define another value
47  * for MAX faulty TPCs which reflects the cluster binning requirements
48  */
49 #define MAX_CLUSTER_BINNING_FAULTY_TPCS		1
50 #define MAX_FAULTY_XBARS			1
51 #define MAX_FAULTY_EDMAS			1
52 #define MAX_FAULTY_DECODERS			1
53 
54 #define GAUDI2_TPC_FULL_MASK			0x1FFFFFF
55 #define GAUDI2_HIF_HMMU_FULL_MASK		0xFFFF
56 #define GAUDI2_DECODER_FULL_MASK		0x3FF
57 
58 #define GAUDI2_NA_EVENT_CAUSE			0xFF
59 #define GAUDI2_NUM_OF_QM_ERR_CAUSE		18
60 #define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE		25
61 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE		3
62 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE		14
63 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE		3
64 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE		2
65 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE		22
66 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE		30
67 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE		25
68 #define GAUDI2_NUM_OF_MME_ERR_CAUSE		16
69 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE	5
70 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE		7
71 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE	8
72 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE		19
73 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE		9
74 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE		3
75 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE	3
76 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE	2
77 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE	2
78 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE	2
79 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE		5
80 
81 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 10)
82 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC		(MMU_CONFIG_TIMEOUT_USEC * 200)
83 #define GAUDI2_ARB_WDT_TIMEOUT			(0x1000000)
84 
85 #define GAUDI2_VDEC_TIMEOUT_USEC		10000		/* 10ms */
86 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC		(GAUDI2_VDEC_TIMEOUT_USEC * 100)
87 
88 #define KDMA_TIMEOUT_USEC			USEC_PER_SEC
89 
90 #define IS_DMA_IDLE(dma_core_sts0)	\
91 	(!((dma_core_sts0) & (DCORE0_EDMA0_CORE_STS0_BUSY_MASK)))
92 
93 #define IS_DMA_HALTED(dma_core_sts1)	\
94 	((dma_core_sts1) & (DCORE0_EDMA0_CORE_STS1_IS_HALT_MASK))
95 
96 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
97 
98 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK))
99 
100 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \
101 	((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \
102 	(((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \
103 	(((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK)))
104 
105 #define PCIE_DEC_EN_MASK			0x300
106 #define DEC_WORK_STATE_IDLE			0
107 #define DEC_WORK_STATE_PEND			3
108 #define IS_DEC_IDLE(dec_swreg15) \
109 	(((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \
110 	((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) ==  DEC_WORK_STATE_PEND)
111 
112 /* HBM MMU address scrambling parameters */
113 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE		SZ_8M
114 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT		26
115 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT		0
116 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK	DRAM_VA_HINT_MASK
117 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR	16
118 #define MMU_RANGE_INV_VA_LSB_SHIFT		12
119 #define MMU_RANGE_INV_VA_MSB_SHIFT		44
120 #define MMU_RANGE_INV_EN_SHIFT			0
121 #define MMU_RANGE_INV_ASID_EN_SHIFT		1
122 #define MMU_RANGE_INV_ASID_SHIFT		2
123 
124 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
125  * a 2 entries FIFO, and hence it is not enabled for it.
126  */
127 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
128 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
129 
130 #define GAUDI2_MAX_STRING_LEN			64
131 
132 #define GAUDI2_VDEC_MSIX_ENTRIES		(GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
133 							GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
134 
135 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
136 
137 /* RAZWI initiator coordinates */
138 #define RAZWI_GET_AXUSER_XY(x) \
139 	((x & 0xF8001FF0) >> 4)
140 
141 #define RAZWI_GET_AXUSER_LOW_XY(x) \
142 	((x & 0x00001FF0) >> 4)
143 
144 #define RAZWI_INITIATOR_AXUER_L_X_SHIFT		0
145 #define RAZWI_INITIATOR_AXUER_L_X_MASK		0x1F
146 #define RAZWI_INITIATOR_AXUER_L_Y_SHIFT		5
147 #define RAZWI_INITIATOR_AXUER_L_Y_MASK		0xF
148 
149 #define RAZWI_INITIATOR_AXUER_H_X_SHIFT		23
150 #define RAZWI_INITIATOR_AXUER_H_X_MASK		0x1F
151 
152 #define RAZWI_INITIATOR_ID_X_Y_LOW(x, y) \
153 	((((y) & RAZWI_INITIATOR_AXUER_L_Y_MASK) << RAZWI_INITIATOR_AXUER_L_Y_SHIFT) | \
154 		(((x) & RAZWI_INITIATOR_AXUER_L_X_MASK) << RAZWI_INITIATOR_AXUER_L_X_SHIFT))
155 
156 #define RAZWI_INITIATOR_ID_X_HIGH(x) \
157 		(((x) & RAZWI_INITIATOR_AXUER_H_X_MASK) << RAZWI_INITIATOR_AXUER_H_X_SHIFT)
158 
159 #define RAZWI_INITIATOR_ID_X_Y(xl, yl, xh) \
160 	(RAZWI_INITIATOR_ID_X_Y_LOW(xl, yl) | RAZWI_INITIATOR_ID_X_HIGH(xh))
161 
162 #define PSOC_RAZWI_ENG_STR_SIZE 128
163 #define PSOC_RAZWI_MAX_ENG_PER_RTR 5
164 
165 struct gaudi2_razwi_info {
166 	u32 axuser_xy;
167 	u32 rtr_ctrl;
168 	u16 eng_id;
169 	char *eng_name;
170 };
171 
172 static struct gaudi2_razwi_info common_razwi_info[] = {
173 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 0), mmDCORE0_RTR0_CTRL_BASE,
174 				GAUDI2_DCORE0_ENGINE_ID_DEC_0, "DEC0"},
175 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
176 				GAUDI2_DCORE0_ENGINE_ID_DEC_1, "DEC1"},
177 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 18), mmDCORE1_RTR7_CTRL_BASE,
178 				GAUDI2_DCORE1_ENGINE_ID_DEC_0, "DEC2"},
179 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
180 				GAUDI2_DCORE1_ENGINE_ID_DEC_1, "DEC3"},
181 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 0), mmDCORE2_RTR0_CTRL_BASE,
182 				GAUDI2_DCORE2_ENGINE_ID_DEC_0, "DEC4"},
183 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
184 				GAUDI2_DCORE2_ENGINE_ID_DEC_1, "DEC5"},
185 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 18), mmDCORE3_RTR7_CTRL_BASE,
186 				GAUDI2_DCORE3_ENGINE_ID_DEC_0, "DEC6"},
187 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
188 				GAUDI2_DCORE3_ENGINE_ID_DEC_1, "DEC7"},
189 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 6), mmDCORE0_RTR0_CTRL_BASE,
190 				GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC8"},
191 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 7), mmDCORE0_RTR0_CTRL_BASE,
192 				GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC9"},
193 		{RAZWI_INITIATOR_ID_X_Y(3, 4, 2), mmDCORE0_RTR1_CTRL_BASE,
194 				GAUDI2_DCORE0_ENGINE_ID_TPC_0, "TPC0"},
195 		{RAZWI_INITIATOR_ID_X_Y(3, 4, 4), mmDCORE0_RTR1_CTRL_BASE,
196 				GAUDI2_DCORE0_ENGINE_ID_TPC_1, "TPC1"},
197 		{RAZWI_INITIATOR_ID_X_Y(4, 4, 2), mmDCORE0_RTR2_CTRL_BASE,
198 				GAUDI2_DCORE0_ENGINE_ID_TPC_2, "TPC2"},
199 		{RAZWI_INITIATOR_ID_X_Y(4, 4, 4), mmDCORE0_RTR2_CTRL_BASE,
200 				GAUDI2_DCORE0_ENGINE_ID_TPC_3, "TPC3"},
201 		{RAZWI_INITIATOR_ID_X_Y(5, 4, 2), mmDCORE0_RTR3_CTRL_BASE,
202 				GAUDI2_DCORE0_ENGINE_ID_TPC_4, "TPC4"},
203 		{RAZWI_INITIATOR_ID_X_Y(5, 4, 4), mmDCORE0_RTR3_CTRL_BASE,
204 				GAUDI2_DCORE0_ENGINE_ID_TPC_5, "TPC5"},
205 		{RAZWI_INITIATOR_ID_X_Y(16, 4, 14), mmDCORE1_RTR6_CTRL_BASE,
206 				GAUDI2_DCORE1_ENGINE_ID_TPC_0, "TPC6"},
207 		{RAZWI_INITIATOR_ID_X_Y(16, 4, 16), mmDCORE1_RTR6_CTRL_BASE,
208 				GAUDI2_DCORE1_ENGINE_ID_TPC_1, "TPC7"},
209 		{RAZWI_INITIATOR_ID_X_Y(15, 4, 14), mmDCORE1_RTR5_CTRL_BASE,
210 				GAUDI2_DCORE1_ENGINE_ID_TPC_2, "TPC8"},
211 		{RAZWI_INITIATOR_ID_X_Y(15, 4, 16), mmDCORE1_RTR5_CTRL_BASE,
212 				GAUDI2_DCORE1_ENGINE_ID_TPC_3, "TPC9"},
213 		{RAZWI_INITIATOR_ID_X_Y(14, 4, 14), mmDCORE1_RTR4_CTRL_BASE,
214 				GAUDI2_DCORE1_ENGINE_ID_TPC_4, "TPC10"},
215 		{RAZWI_INITIATOR_ID_X_Y(14, 4, 16), mmDCORE1_RTR4_CTRL_BASE,
216 				GAUDI2_DCORE1_ENGINE_ID_TPC_5, "TPC11"},
217 		{RAZWI_INITIATOR_ID_X_Y(5, 11, 2), mmDCORE2_RTR3_CTRL_BASE,
218 				GAUDI2_DCORE2_ENGINE_ID_TPC_0, "TPC12"},
219 		{RAZWI_INITIATOR_ID_X_Y(5, 11, 4), mmDCORE2_RTR3_CTRL_BASE,
220 				GAUDI2_DCORE2_ENGINE_ID_TPC_1, "TPC13"},
221 		{RAZWI_INITIATOR_ID_X_Y(4, 11, 2), mmDCORE2_RTR2_CTRL_BASE,
222 				GAUDI2_DCORE2_ENGINE_ID_TPC_2, "TPC14"},
223 		{RAZWI_INITIATOR_ID_X_Y(4, 11, 4), mmDCORE2_RTR2_CTRL_BASE,
224 				GAUDI2_DCORE2_ENGINE_ID_TPC_3, "TPC15"},
225 		{RAZWI_INITIATOR_ID_X_Y(3, 11, 2), mmDCORE2_RTR1_CTRL_BASE,
226 				GAUDI2_DCORE2_ENGINE_ID_TPC_4, "TPC16"},
227 		{RAZWI_INITIATOR_ID_X_Y(3, 11, 4), mmDCORE2_RTR1_CTRL_BASE,
228 				GAUDI2_DCORE2_ENGINE_ID_TPC_5, "TPC17"},
229 		{RAZWI_INITIATOR_ID_X_Y(14, 11, 14), mmDCORE3_RTR4_CTRL_BASE,
230 				GAUDI2_DCORE3_ENGINE_ID_TPC_0, "TPC18"},
231 		{RAZWI_INITIATOR_ID_X_Y(14, 11, 16), mmDCORE3_RTR4_CTRL_BASE,
232 				GAUDI2_DCORE3_ENGINE_ID_TPC_1, "TPC19"},
233 		{RAZWI_INITIATOR_ID_X_Y(15, 11, 14), mmDCORE3_RTR5_CTRL_BASE,
234 				GAUDI2_DCORE3_ENGINE_ID_TPC_2, "TPC20"},
235 		{RAZWI_INITIATOR_ID_X_Y(15, 11, 16), mmDCORE3_RTR5_CTRL_BASE,
236 				GAUDI2_DCORE3_ENGINE_ID_TPC_3, "TPC21"},
237 		{RAZWI_INITIATOR_ID_X_Y(16, 11, 14), mmDCORE3_RTR6_CTRL_BASE,
238 				GAUDI2_DCORE3_ENGINE_ID_TPC_4, "TPC22"},
239 		{RAZWI_INITIATOR_ID_X_Y(16, 11, 16), mmDCORE3_RTR6_CTRL_BASE,
240 				GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC23"},
241 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
242 				GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC24"},
243 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 8), mmDCORE1_RTR7_CTRL_BASE,
244 				GAUDI2_ENGINE_ID_NIC0_0, "NIC0"},
245 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 10), mmDCORE1_RTR7_CTRL_BASE,
246 				GAUDI2_ENGINE_ID_NIC0_1, "NIC1"},
247 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 12), mmDCORE1_RTR7_CTRL_BASE,
248 				GAUDI2_ENGINE_ID_NIC1_0, "NIC2"},
249 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
250 				GAUDI2_ENGINE_ID_NIC1_1, "NIC3"},
251 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 15), mmDCORE1_RTR7_CTRL_BASE,
252 				GAUDI2_ENGINE_ID_NIC2_0, "NIC4"},
253 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
254 				GAUDI2_ENGINE_ID_NIC2_1, "NIC5"},
255 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
256 				GAUDI2_ENGINE_ID_NIC3_0, "NIC6"},
257 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 6), mmDCORE2_RTR0_CTRL_BASE,
258 				GAUDI2_ENGINE_ID_NIC3_1, "NIC7"},
259 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 8), mmDCORE2_RTR0_CTRL_BASE,
260 				GAUDI2_ENGINE_ID_NIC4_0, "NIC8"},
261 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 12), mmDCORE3_RTR7_CTRL_BASE,
262 				GAUDI2_ENGINE_ID_NIC4_1, "NIC9"},
263 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
264 				GAUDI2_ENGINE_ID_NIC5_0, "NIC10"},
265 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
266 				GAUDI2_ENGINE_ID_NIC5_1, "NIC11"},
267 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
268 				GAUDI2_ENGINE_ID_PDMA_0, "PDMA0"},
269 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 3), mmDCORE0_RTR0_CTRL_BASE,
270 				GAUDI2_ENGINE_ID_PDMA_1, "PDMA1"},
271 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
272 				GAUDI2_ENGINE_ID_SIZE, "PMMU"},
273 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 5), mmDCORE0_RTR0_CTRL_BASE,
274 				GAUDI2_ENGINE_ID_SIZE, "PCIE"},
275 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 16), mmDCORE1_RTR7_CTRL_BASE,
276 				GAUDI2_ENGINE_ID_ARC_FARM, "ARC_FARM"},
277 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 17), mmDCORE1_RTR7_CTRL_BASE,
278 				GAUDI2_ENGINE_ID_KDMA, "KDMA"},
279 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF1_RTR_CTRL_BASE,
280 				GAUDI2_DCORE0_ENGINE_ID_EDMA_0, "EDMA0"},
281 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE,
282 				GAUDI2_DCORE0_ENGINE_ID_EDMA_1, "EDMA1"},
283 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF1_RTR_CTRL_BASE,
284 				GAUDI2_DCORE1_ENGINE_ID_EDMA_0, "EDMA2"},
285 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF0_RTR_CTRL_BASE,
286 				GAUDI2_DCORE1_ENGINE_ID_EDMA_1, "EDMA3"},
287 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
288 				GAUDI2_DCORE2_ENGINE_ID_EDMA_0, "EDMA4"},
289 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
290 				GAUDI2_DCORE2_ENGINE_ID_EDMA_1, "EDMA5"},
291 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
292 				GAUDI2_DCORE3_ENGINE_ID_EDMA_0, "EDMA6"},
293 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
294 				GAUDI2_DCORE3_ENGINE_ID_EDMA_1, "EDMA7"},
295 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
296 				GAUDI2_ENGINE_ID_SIZE, "HMMU0"},
297 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
298 				GAUDI2_ENGINE_ID_SIZE, "HMMU1"},
299 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
300 				GAUDI2_ENGINE_ID_SIZE, "HMMU2"},
301 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
302 				GAUDI2_ENGINE_ID_SIZE, "HMMU3"},
303 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
304 				GAUDI2_ENGINE_ID_SIZE, "HMMU4"},
305 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
306 				GAUDI2_ENGINE_ID_SIZE, "HMMU5"},
307 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
308 				GAUDI2_ENGINE_ID_SIZE, "HMMU6"},
309 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
310 				GAUDI2_ENGINE_ID_SIZE, "HMMU7"},
311 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
312 				GAUDI2_ENGINE_ID_SIZE, "HMMU8"},
313 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
314 				GAUDI2_ENGINE_ID_SIZE, "HMMU9"},
315 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
316 				GAUDI2_ENGINE_ID_SIZE, "HMMU10"},
317 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
318 				GAUDI2_ENGINE_ID_SIZE, "HMMU11"},
319 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
320 				GAUDI2_ENGINE_ID_SIZE, "HMMU12"},
321 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
322 				GAUDI2_ENGINE_ID_SIZE, "HMMU13"},
323 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
324 				GAUDI2_ENGINE_ID_SIZE, "HMMU14"},
325 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
326 				GAUDI2_ENGINE_ID_SIZE, "HMMU15"},
327 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
328 				GAUDI2_ENGINE_ID_ROT_0, "ROT0"},
329 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
330 				GAUDI2_ENGINE_ID_ROT_1, "ROT1"},
331 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
332 				GAUDI2_ENGINE_ID_PSOC, "CPU"},
333 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 11), mmDCORE3_RTR7_CTRL_BASE,
334 				GAUDI2_ENGINE_ID_PSOC, "PSOC"}
335 };
336 
337 static struct gaudi2_razwi_info mme_razwi_info[] = {
338 		/* MME X high coordinate is N/A, hence using only low coordinates */
339 		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
340 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP0"},
341 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
342 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP1"},
343 		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
344 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_WR"},
345 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
346 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_RD"},
347 		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
348 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE0"},
349 		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
350 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE1"},
351 		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
352 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE2"},
353 		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
354 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE3"},
355 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
356 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE4"},
357 		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
358 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP0"},
359 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
360 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP1"},
361 		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
362 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_WR"},
363 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
364 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_RD"},
365 		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
366 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE0"},
367 		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
368 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE1"},
369 		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
370 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE2"},
371 		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
372 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE3"},
373 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
374 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE4"},
375 		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
376 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP0"},
377 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
378 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP1"},
379 		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
380 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_WR"},
381 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
382 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_RD"},
383 		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
384 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE0"},
385 		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
386 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE1"},
387 		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
388 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE2"},
389 		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
390 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE3"},
391 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
392 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE4"},
393 		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
394 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP0"},
395 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
396 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP1"},
397 		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
398 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_WR"},
399 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
400 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_RD"},
401 		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
402 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE0"},
403 		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
404 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE1"},
405 		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
406 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE2"},
407 		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
408 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE3"},
409 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
410 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE4"}
411 };
412 
413 enum hl_pmmu_fatal_cause {
414 	LATENCY_RD_OUT_FIFO_OVERRUN,
415 	LATENCY_WR_OUT_FIFO_OVERRUN,
416 };
417 
418 enum hl_pcie_drain_ind_cause {
419 	LBW_AXI_DRAIN_IND,
420 	HBW_AXI_DRAIN_IND
421 };
422 
423 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = {
424 	[HBM_ID0] = 0xFFFC,
425 	[HBM_ID1] = 0xFFCF,
426 	[HBM_ID2] = 0xF7F7,
427 	[HBM_ID3] = 0x7F7F,
428 	[HBM_ID4] = 0xFCFF,
429 	[HBM_ID5] = 0xCFFF,
430 };
431 
432 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = {
433 	[0] = HBM_ID0,
434 	[1] = HBM_ID1,
435 	[2] = HBM_ID4,
436 	[3] = HBM_ID5,
437 };
438 
439 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = {
440 	[EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0,
441 	[EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2,
442 	[EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1,
443 	[EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3,
444 	[EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2,
445 	[EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4,
446 	[EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3,
447 	[EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5,
448 };
449 
450 static const int gaudi2_qman_async_event_id[] = {
451 	[GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM,
452 	[GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM,
453 	[GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM,
454 	[GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM,
455 	[GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM,
456 	[GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM,
457 	[GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM,
458 	[GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM,
459 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM,
460 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM,
461 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM,
462 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM,
463 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM,
464 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM,
465 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM,
466 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM,
467 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM,
468 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM,
469 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM,
470 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM,
471 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM,
472 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM,
473 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM,
474 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM,
475 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM,
476 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM,
477 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM,
478 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM,
479 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM,
480 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM,
481 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM,
482 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM,
483 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM,
484 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM,
485 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM,
486 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM,
487 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM,
488 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM,
489 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM,
490 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM,
491 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM,
492 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM,
493 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM,
494 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM,
495 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM,
496 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM,
497 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM,
498 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM,
499 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM,
500 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM,
501 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM,
502 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM,
503 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM,
504 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM,
505 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM,
506 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM,
507 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM,
508 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM,
509 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM,
510 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM,
511 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM,
512 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM,
513 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM,
514 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM,
515 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM,
516 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM,
517 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM,
518 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM,
519 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM,
520 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM,
521 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM,
522 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM,
523 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM,
524 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM,
525 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM,
526 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM,
527 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM,
528 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM,
529 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM,
530 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM,
531 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM,
532 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM,
533 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM,
534 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM,
535 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM,
536 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM,
537 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM,
538 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM,
539 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM,
540 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM,
541 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM,
542 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM,
543 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM,
544 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM,
545 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM,
546 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM,
547 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM,
548 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM,
549 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM,
550 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM,
551 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM,
552 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM,
553 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM,
554 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM,
555 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM,
556 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM,
557 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM,
558 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM,
559 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM,
560 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM,
561 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM,
562 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM,
563 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM,
564 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM,
565 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM,
566 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM,
567 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM,
568 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM,
569 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM,
570 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM,
571 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM,
572 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM,
573 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM,
574 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM,
575 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM,
576 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM,
577 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM,
578 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM,
579 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM,
580 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM,
581 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM,
582 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM,
583 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM,
584 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM,
585 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM,
586 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM,
587 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM,
588 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM,
589 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM,
590 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM,
591 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM,
592 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM,
593 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM,
594 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM,
595 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM,
596 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM,
597 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM,
598 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM,
599 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM,
600 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM,
601 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM,
602 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM,
603 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM,
604 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM,
605 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM,
606 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM,
607 	[GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0,
608 	[GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0,
609 	[GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0,
610 	[GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0,
611 	[GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1,
612 	[GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1,
613 	[GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1,
614 	[GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1,
615 	[GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0,
616 	[GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0,
617 	[GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0,
618 	[GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0,
619 	[GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1,
620 	[GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1,
621 	[GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1,
622 	[GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1,
623 	[GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0,
624 	[GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0,
625 	[GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0,
626 	[GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0,
627 	[GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1,
628 	[GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1,
629 	[GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1,
630 	[GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1,
631 	[GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0,
632 	[GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0,
633 	[GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0,
634 	[GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0,
635 	[GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1,
636 	[GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1,
637 	[GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1,
638 	[GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1,
639 	[GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0,
640 	[GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0,
641 	[GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0,
642 	[GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0,
643 	[GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1,
644 	[GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1,
645 	[GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1,
646 	[GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1,
647 	[GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0,
648 	[GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0,
649 	[GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0,
650 	[GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0,
651 	[GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1,
652 	[GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1,
653 	[GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1,
654 	[GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1,
655 	[GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0,
656 	[GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0,
657 	[GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0,
658 	[GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0,
659 	[GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1,
660 	[GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1,
661 	[GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1,
662 	[GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1,
663 	[GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0,
664 	[GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0,
665 	[GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0,
666 	[GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0,
667 	[GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1,
668 	[GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1,
669 	[GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1,
670 	[GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1,
671 	[GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0,
672 	[GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0,
673 	[GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0,
674 	[GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0,
675 	[GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1,
676 	[GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1,
677 	[GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1,
678 	[GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1,
679 	[GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0,
680 	[GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0,
681 	[GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0,
682 	[GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0,
683 	[GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1,
684 	[GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1,
685 	[GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1,
686 	[GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1,
687 	[GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0,
688 	[GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0,
689 	[GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0,
690 	[GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0,
691 	[GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1,
692 	[GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1,
693 	[GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1,
694 	[GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1,
695 	[GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0,
696 	[GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0,
697 	[GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0,
698 	[GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0,
699 	[GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1,
700 	[GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1,
701 	[GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1,
702 	[GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1,
703 	[GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
704 	[GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
705 	[GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
706 	[GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
707 	[GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
708 	[GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
709 	[GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
710 	[GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM
711 };
712 
713 static const int gaudi2_dma_core_async_event_id[] = {
714 	[DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE,
715 	[DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE,
716 	[DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE,
717 	[DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE,
718 	[DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE,
719 	[DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE,
720 	[DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE,
721 	[DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE,
722 	[DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE,
723 	[DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE,
724 	[DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
725 };
726 
727 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
728 	"qman sei intr",
729 	"arc sei intr"
730 };
731 
732 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = {
733 	"AXI_TERMINATOR WR",
734 	"AXI_TERMINATOR RD",
735 	"AXI SPLIT SEI Status"
736 };
737 
738 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = {
739 	"cbu_bresp_sei_intr_cause",
740 	"cbu_rresp_sei_intr_cause",
741 	"lbu_bresp_sei_intr_cause",
742 	"lbu_rresp_sei_intr_cause",
743 	"cbu_axi_split_intr_cause",
744 	"lbu_axi_split_intr_cause",
745 	"arc_ip_excptn_sei_intr_cause",
746 	"dmi_bresp_sei_intr_cause",
747 	"aux2apb_err_sei_intr_cause",
748 	"cfg_lbw_wr_terminated_intr_cause",
749 	"cfg_lbw_rd_terminated_intr_cause",
750 	"cfg_dccm_wr_terminated_intr_cause",
751 	"cfg_dccm_rd_terminated_intr_cause",
752 	"cfg_hbw_rd_terminated_intr_cause"
753 };
754 
755 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = {
756 	"msix_vcd_hbw_sei",
757 	"msix_l2c_hbw_sei",
758 	"msix_nrm_hbw_sei",
759 	"msix_abnrm_hbw_sei",
760 	"msix_vcd_lbw_sei",
761 	"msix_l2c_lbw_sei",
762 	"msix_nrm_lbw_sei",
763 	"msix_abnrm_lbw_sei",
764 	"apb_vcd_lbw_sei",
765 	"apb_l2c_lbw_sei",
766 	"apb_nrm_lbw_sei",
767 	"apb_abnrm_lbw_sei",
768 	"dec_sei",
769 	"dec_apb_sei",
770 	"trc_apb_sei",
771 	"lbw_mstr_if_sei",
772 	"axi_split_bresp_err_sei",
773 	"hbw_axi_wr_viol_sei",
774 	"hbw_axi_rd_viol_sei",
775 	"lbw_axi_wr_viol_sei",
776 	"lbw_axi_rd_viol_sei",
777 	"vcd_spi",
778 	"l2c_spi",
779 	"nrm_spi",
780 	"abnrm_spi",
781 };
782 
783 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = {
784 	"PQ AXI HBW error",
785 	"CQ AXI HBW error",
786 	"CP AXI HBW error",
787 	"CP error due to undefined OPCODE",
788 	"CP encountered STOP OPCODE",
789 	"CP AXI LBW error",
790 	"CP WRREG32 or WRBULK returned error",
791 	"N/A",
792 	"FENCE 0 inc over max value and clipped",
793 	"FENCE 1 inc over max value and clipped",
794 	"FENCE 2 inc over max value and clipped",
795 	"FENCE 3 inc over max value and clipped",
796 	"FENCE 0 dec under min value and clipped",
797 	"FENCE 1 dec under min value and clipped",
798 	"FENCE 2 dec under min value and clipped",
799 	"FENCE 3 dec under min value and clipped",
800 	"CPDMA Up overflow",
801 	"PQC L2H error"
802 };
803 
804 static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = {
805 	"RSVD0",
806 	"CQ AXI HBW error",
807 	"CP AXI HBW error",
808 	"CP error due to undefined OPCODE",
809 	"CP encountered STOP OPCODE",
810 	"CP AXI LBW error",
811 	"CP WRREG32 or WRBULK returned error",
812 	"N/A",
813 	"FENCE 0 inc over max value and clipped",
814 	"FENCE 1 inc over max value and clipped",
815 	"FENCE 2 inc over max value and clipped",
816 	"FENCE 3 inc over max value and clipped",
817 	"FENCE 0 dec under min value and clipped",
818 	"FENCE 1 dec under min value and clipped",
819 	"FENCE 2 dec under min value and clipped",
820 	"FENCE 3 dec under min value and clipped",
821 	"CPDMA Up overflow",
822 	"RSVD17",
823 	"CQ_WR_IFIFO_CI_ERR",
824 	"CQ_WR_CTL_CI_ERR",
825 	"ARC_CQF_RD_ERR",
826 	"ARC_CQ_WR_IFIFO_CI_ERR",
827 	"ARC_CQ_WR_CTL_CI_ERR",
828 	"ARC_AXI_ERR",
829 	"CP_SWITCH_WDT_ERR"
830 };
831 
832 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = {
833 	"Choice push while full error",
834 	"Choice Q watchdog error",
835 	"MSG AXI LBW returned with error"
836 };
837 
838 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = {
839 	"qm_axi_err",
840 	"qm_trace_fence_events",
841 	"qm_sw_err",
842 	"qm_cp_sw_stop",
843 	"lbw_mstr_rresp_err",
844 	"lbw_mstr_bresp_err",
845 	"lbw_msg_slverr",
846 	"hbw_msg_slverr",
847 	"wbc_slverr",
848 	"hbw_mstr_rresp_err",
849 	"hbw_mstr_bresp_err",
850 	"sb_resp_intr",
851 	"mrsb_resp_intr",
852 	"core_dw_status_0",
853 	"core_dw_status_1",
854 	"core_dw_status_2",
855 	"core_dw_status_3",
856 	"core_dw_status_4",
857 	"core_dw_status_5",
858 	"core_dw_status_6",
859 	"core_dw_status_7",
860 	"async_arc2cpu_sei_intr",
861 };
862 
863 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = {
864 	"tpc_address_exceed_slm",
865 	"tpc_div_by_0",
866 	"tpc_spu_mac_overflow",
867 	"tpc_spu_addsub_overflow",
868 	"tpc_spu_abs_overflow",
869 	"tpc_spu_fma_fp_dst_nan",
870 	"tpc_spu_fma_fp_dst_inf",
871 	"tpc_spu_convert_fp_dst_nan",
872 	"tpc_spu_convert_fp_dst_inf",
873 	"tpc_spu_fp_dst_denorm",
874 	"tpc_vpu_mac_overflow",
875 	"tpc_vpu_addsub_overflow",
876 	"tpc_vpu_abs_overflow",
877 	"tpc_vpu_convert_fp_dst_nan",
878 	"tpc_vpu_convert_fp_dst_inf",
879 	"tpc_vpu_fma_fp_dst_nan",
880 	"tpc_vpu_fma_fp_dst_inf",
881 	"tpc_vpu_fp_dst_denorm",
882 	"tpc_assertions",
883 	"tpc_illegal_instruction",
884 	"tpc_pc_wrap_around",
885 	"tpc_qm_sw_err",
886 	"tpc_hbw_rresp_err",
887 	"tpc_hbw_bresp_err",
888 	"tpc_lbw_rresp_err",
889 	"tpc_lbw_bresp_err",
890 	"st_unlock_already_locked",
891 	"invalid_lock_access",
892 	"LD_L protection violation",
893 	"ST_L protection violation",
894 };
895 
896 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
897 	"agu_resp_intr",
898 	"qman_axi_err",
899 	"wap sei (wbc axi err)",
900 	"arc sei",
901 	"cfg access error",
902 	"qm_sw_err",
903 	"sbte_dbg_intr_0",
904 	"sbte_dbg_intr_1",
905 	"sbte_dbg_intr_2",
906 	"sbte_dbg_intr_3",
907 	"sbte_dbg_intr_4",
908 	"sbte_prtn_intr_0",
909 	"sbte_prtn_intr_1",
910 	"sbte_prtn_intr_2",
911 	"sbte_prtn_intr_3",
912 	"sbte_prtn_intr_4",
913 };
914 
915 static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
916 	"i0",
917 	"i1",
918 	"i2",
919 	"i3",
920 	"i4",
921 };
922 
923 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
924 	"WBC ERR RESP_0",
925 	"WBC ERR RESP_1",
926 	"AP SOURCE POS INF",
927 	"AP SOURCE NEG INF",
928 	"AP SOURCE NAN",
929 	"AP RESULT POS INF",
930 	"AP RESULT NEG INF",
931 };
932 
933 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
934 	"HBW Read returned with error RRESP",
935 	"HBW write returned with error BRESP",
936 	"LBW write returned with error BRESP",
937 	"descriptor_fifo_overflow",
938 	"KDMA SB LBW Read returned with error",
939 	"KDMA WBC LBW Write returned with error",
940 	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
941 	"WRONG CFG FOR COMMIT IN LIN DMA"
942 };
943 
944 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
945 	"HBW/LBW Read returned with error RRESP",
946 	"HBW/LBW write returned with error BRESP",
947 	"LBW write returned with error BRESP",
948 	"descriptor_fifo_overflow",
949 	"KDMA SB LBW Read returned with error",
950 	"KDMA WBC LBW Write returned with error",
951 	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
952 	"WRONG CFG FOR COMMIT IN LIN DMA"
953 };
954 
955 struct gaudi2_sm_sei_cause_data {
956 	const char *cause_name;
957 	const char *log_name;
958 };
959 
960 static const struct gaudi2_sm_sei_cause_data
961 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = {
962 	{"calculated SO value overflow/underflow", "SOB ID"},
963 	{"payload address of monitor is not aligned to 4B", "monitor addr"},
964 	{"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id"},
965 };
966 
967 static const char * const
968 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = {
969 	"LATENCY_RD_OUT_FIFO_OVERRUN",
970 	"LATENCY_WR_OUT_FIFO_OVERRUN",
971 };
972 
973 static const char * const
974 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = {
975 	"LATENCY_RD_OUT_FIFO_OVERRUN",
976 	"LATENCY_WR_OUT_FIFO_OVERRUN",
977 };
978 
979 static const char * const
980 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = {
981 	"AXI drain HBW",
982 	"AXI drain LBW",
983 };
984 
985 static const char * const
986 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
987 	"HBW error response",
988 	"LBW error response",
989 	"TLP is blocked by RR"
990 };
991 
992 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
993 	[GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
994 	[GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
995 	[GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE,
996 	[GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE,
997 	[GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE,
998 	[GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE,
999 	[GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE,
1000 	[GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE,
1001 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE,
1002 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE,
1003 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE,
1004 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE,
1005 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE,
1006 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE,
1007 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE,
1008 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE,
1009 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE,
1010 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE,
1011 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE,
1012 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE,
1013 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE,
1014 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE,
1015 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE,
1016 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE,
1017 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE,
1018 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE,
1019 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE,
1020 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE,
1021 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE,
1022 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE,
1023 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE,
1024 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE,
1025 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE,
1026 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE,
1027 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE,
1028 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE,
1029 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE,
1030 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE,
1031 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE,
1032 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE,
1033 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE,
1034 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE,
1035 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE,
1036 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE,
1037 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE,
1038 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE,
1039 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE,
1040 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE,
1041 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE,
1042 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE,
1043 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE,
1044 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE,
1045 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE,
1046 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE,
1047 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE,
1048 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE,
1049 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE,
1050 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE,
1051 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE,
1052 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE,
1053 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE,
1054 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE,
1055 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE,
1056 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE,
1057 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE,
1058 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE,
1059 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE,
1060 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE,
1061 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE,
1062 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE,
1063 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE,
1064 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE,
1065 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE,
1066 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE,
1067 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE,
1068 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE,
1069 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE,
1070 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE,
1071 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE,
1072 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE,
1073 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE,
1074 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE,
1075 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE,
1076 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE,
1077 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE,
1078 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE,
1079 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE,
1080 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE,
1081 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE,
1082 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE,
1083 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE,
1084 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE,
1085 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE,
1086 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE,
1087 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE,
1088 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE,
1089 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE,
1090 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE,
1091 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE,
1092 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE,
1093 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE,
1094 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE,
1095 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE,
1096 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE,
1097 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE,
1098 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE,
1099 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE,
1100 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE,
1101 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE,
1102 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE,
1103 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE,
1104 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE,
1105 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE,
1106 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE,
1107 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE,
1108 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE,
1109 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE,
1110 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE,
1111 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE,
1112 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE,
1113 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE,
1114 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE,
1115 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE,
1116 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE,
1117 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE,
1118 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE,
1119 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE,
1120 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE,
1121 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE,
1122 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE,
1123 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE,
1124 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE,
1125 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE,
1126 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE,
1127 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE,
1128 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE,
1129 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE,
1130 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE,
1131 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE,
1132 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE,
1133 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE,
1134 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE,
1135 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE,
1136 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE,
1137 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE,
1138 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE,
1139 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE,
1140 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE,
1141 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE,
1142 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE,
1143 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE,
1144 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE,
1145 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE,
1146 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE,
1147 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE,
1148 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE,
1149 	[GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE,
1150 	[GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE,
1151 	[GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE,
1152 	[GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE,
1153 	[GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE,
1154 	[GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE,
1155 	[GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE,
1156 	[GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE,
1157 	[GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE,
1158 	[GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE,
1159 	[GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE,
1160 	[GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE,
1161 	[GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE,
1162 	[GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE,
1163 	[GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE,
1164 	[GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE,
1165 	[GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE,
1166 	[GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE,
1167 	[GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE,
1168 	[GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE,
1169 	[GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE,
1170 	[GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE,
1171 	[GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE,
1172 	[GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE,
1173 	[GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE,
1174 	[GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE,
1175 	[GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE,
1176 	[GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE,
1177 	[GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE,
1178 	[GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE,
1179 	[GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE,
1180 	[GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE,
1181 	[GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE,
1182 	[GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE,
1183 	[GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE,
1184 	[GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE,
1185 	[GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE,
1186 	[GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE,
1187 	[GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE,
1188 	[GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE,
1189 	[GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE,
1190 	[GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE,
1191 	[GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE,
1192 	[GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE,
1193 	[GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE,
1194 	[GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE,
1195 	[GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE,
1196 	[GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE,
1197 	[GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE,
1198 	[GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE,
1199 	[GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE,
1200 	[GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE,
1201 	[GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE,
1202 	[GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE,
1203 	[GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE,
1204 	[GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE,
1205 	[GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE,
1206 	[GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE,
1207 	[GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE,
1208 	[GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE,
1209 	[GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE,
1210 	[GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE,
1211 	[GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE,
1212 	[GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE,
1213 	[GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE,
1214 	[GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE,
1215 	[GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE,
1216 	[GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE,
1217 	[GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE,
1218 	[GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE,
1219 	[GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE,
1220 	[GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE,
1221 	[GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE,
1222 	[GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE,
1223 	[GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE,
1224 	[GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE,
1225 	[GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE,
1226 	[GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE,
1227 	[GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE,
1228 	[GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE,
1229 	[GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE,
1230 	[GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE,
1231 	[GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE,
1232 	[GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE,
1233 	[GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE,
1234 	[GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE,
1235 	[GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE,
1236 	[GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE,
1237 	[GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE,
1238 	[GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE,
1239 	[GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE,
1240 	[GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE,
1241 	[GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE,
1242 	[GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE,
1243 	[GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE,
1244 	[GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE,
1245 	[GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE,
1246 	[GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE,
1247 	[GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE,
1248 	[GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE,
1249 	[GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE,
1250 	[GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE,
1251 	[GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE,
1252 	[GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE
1253 };
1254 
1255 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = {
1256 	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE,
1257 	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE,
1258 	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE,
1259 	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE,
1260 	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE,
1261 	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE,
1262 	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE,
1263 	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE,
1264 	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE,
1265 	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE,
1266 	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE,
1267 	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE,
1268 	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE,
1269 	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE,
1270 	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE,
1271 	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE,
1272 	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE,
1273 	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE,
1274 	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE,
1275 	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE,
1276 	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE,
1277 	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE,
1278 	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE,
1279 	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE,
1280 	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE,
1281 	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE,
1282 	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE,
1283 	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE,
1284 	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE,
1285 	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE,
1286 	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE,
1287 	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE,
1288 	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE,
1289 	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE,
1290 	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE,
1291 	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE,
1292 	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE,
1293 	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE,
1294 	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE,
1295 	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE,
1296 	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE,
1297 	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE,
1298 	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE,
1299 	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE,
1300 	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE,
1301 	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE,
1302 	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE,
1303 	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE,
1304 	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE,
1305 	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE,
1306 	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE,
1307 	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE,
1308 	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE,
1309 	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE,
1310 	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE,
1311 	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE,
1312 	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE,
1313 	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE,
1314 	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE,
1315 	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE,
1316 	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE,
1317 	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE,
1318 	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE,
1319 	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE,
1320 	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE,
1321 	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE,
1322 	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE,
1323 	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE,
1324 	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE,
1325 };
1326 
1327 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = {
1328 	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE,
1329 	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE,
1330 	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE,
1331 	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE,
1332 	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE,
1333 	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE,
1334 	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE,
1335 	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE,
1336 	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE,
1337 	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE,
1338 	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE,
1339 	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE,
1340 	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE,
1341 	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE,
1342 	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE,
1343 	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE,
1344 	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE,
1345 	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE,
1346 	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE,
1347 	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE,
1348 	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE,
1349 	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE,
1350 	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE,
1351 	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE,
1352 	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE,
1353 	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE,
1354 	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE,
1355 	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE,
1356 	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE,
1357 	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE,
1358 	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE,
1359 	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE,
1360 	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE,
1361 	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE,
1362 	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE,
1363 	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE,
1364 	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE,
1365 	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE,
1366 	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE,
1367 	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE,
1368 	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE,
1369 	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE,
1370 	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE,
1371 	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE,
1372 	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE,
1373 	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE,
1374 	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE,
1375 	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE,
1376 	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE,
1377 	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE,
1378 	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE,
1379 	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE,
1380 	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE,
1381 	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE,
1382 	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE,
1383 	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE,
1384 	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE,
1385 	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE,
1386 	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE,
1387 	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE,
1388 	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE,
1389 	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE,
1390 	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE,
1391 	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE,
1392 	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE,
1393 	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE,
1394 	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE,
1395 	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE,
1396 	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE,
1397 };
1398 
1399 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = {
1400 	[MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE,
1401 	[MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE,
1402 	[MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE,
1403 	[MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE,
1404 };
1405 
1406 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = {
1407 	[GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0,
1408 	[GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0,
1409 	[GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0,
1410 	[GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0,
1411 	[GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1,
1412 	[GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1,
1413 	[GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1,
1414 	[GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1,
1415 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0,
1416 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0,
1417 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0,
1418 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0,
1419 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1,
1420 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1,
1421 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1,
1422 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1,
1423 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0,
1424 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0,
1425 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0,
1426 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0,
1427 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0,
1428 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0,
1429 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0,
1430 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0,
1431 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1,
1432 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1,
1433 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1,
1434 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1,
1435 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2,
1436 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2,
1437 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2,
1438 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2,
1439 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3,
1440 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3,
1441 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3,
1442 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3,
1443 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4,
1444 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4,
1445 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4,
1446 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4,
1447 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5,
1448 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5,
1449 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5,
1450 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5,
1451 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24,
1452 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24,
1453 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24,
1454 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24,
1455 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2,
1456 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2,
1457 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2,
1458 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2,
1459 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3,
1460 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3,
1461 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3,
1462 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3,
1463 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4,
1464 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4,
1465 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4,
1466 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4,
1467 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6,
1468 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6,
1469 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6,
1470 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6,
1471 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7,
1472 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7,
1473 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7,
1474 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7,
1475 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8,
1476 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8,
1477 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8,
1478 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8,
1479 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9,
1480 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9,
1481 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9,
1482 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9,
1483 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10,
1484 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10,
1485 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10,
1486 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10,
1487 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11,
1488 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11,
1489 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11,
1490 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11,
1491 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4,
1492 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4,
1493 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4,
1494 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4,
1495 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5,
1496 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5,
1497 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5,
1498 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5,
1499 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1,
1500 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1,
1501 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1,
1502 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1,
1503 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12,
1504 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12,
1505 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12,
1506 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12,
1507 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13,
1508 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13,
1509 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13,
1510 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13,
1511 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14,
1512 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14,
1513 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14,
1514 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14,
1515 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15,
1516 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15,
1517 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15,
1518 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15,
1519 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16,
1520 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16,
1521 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16,
1522 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16,
1523 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17,
1524 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17,
1525 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17,
1526 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17,
1527 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6,
1528 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6,
1529 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6,
1530 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6,
1531 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7,
1532 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7,
1533 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7,
1534 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7,
1535 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5,
1536 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5,
1537 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5,
1538 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5,
1539 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18,
1540 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18,
1541 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18,
1542 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18,
1543 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19,
1544 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19,
1545 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19,
1546 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19,
1547 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20,
1548 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20,
1549 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20,
1550 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20,
1551 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21,
1552 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21,
1553 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21,
1554 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21,
1555 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22,
1556 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22,
1557 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22,
1558 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22,
1559 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23,
1560 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23,
1561 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23,
1562 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23,
1563 	[GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0,
1564 	[GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0,
1565 	[GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0,
1566 	[GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0,
1567 	[GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1,
1568 	[GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1,
1569 	[GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1,
1570 	[GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1,
1571 	[GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2,
1572 	[GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2,
1573 	[GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2,
1574 	[GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2,
1575 	[GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3,
1576 	[GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3,
1577 	[GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3,
1578 	[GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3,
1579 	[GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4,
1580 	[GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4,
1581 	[GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4,
1582 	[GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4,
1583 	[GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5,
1584 	[GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5,
1585 	[GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5,
1586 	[GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5,
1587 	[GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6,
1588 	[GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6,
1589 	[GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6,
1590 	[GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6,
1591 	[GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7,
1592 	[GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7,
1593 	[GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7,
1594 	[GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7,
1595 	[GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8,
1596 	[GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8,
1597 	[GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8,
1598 	[GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8,
1599 	[GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9,
1600 	[GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9,
1601 	[GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9,
1602 	[GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9,
1603 	[GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10,
1604 	[GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10,
1605 	[GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10,
1606 	[GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10,
1607 	[GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11,
1608 	[GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11,
1609 	[GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11,
1610 	[GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11,
1611 	[GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12,
1612 	[GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12,
1613 	[GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12,
1614 	[GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12,
1615 	[GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13,
1616 	[GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13,
1617 	[GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13,
1618 	[GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13,
1619 	[GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14,
1620 	[GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14,
1621 	[GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14,
1622 	[GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14,
1623 	[GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15,
1624 	[GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15,
1625 	[GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15,
1626 	[GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15,
1627 	[GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16,
1628 	[GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16,
1629 	[GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16,
1630 	[GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16,
1631 	[GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17,
1632 	[GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17,
1633 	[GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17,
1634 	[GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17,
1635 	[GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18,
1636 	[GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18,
1637 	[GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18,
1638 	[GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18,
1639 	[GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19,
1640 	[GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19,
1641 	[GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19,
1642 	[GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19,
1643 	[GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20,
1644 	[GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20,
1645 	[GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20,
1646 	[GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20,
1647 	[GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21,
1648 	[GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21,
1649 	[GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21,
1650 	[GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21,
1651 	[GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22,
1652 	[GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22,
1653 	[GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22,
1654 	[GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22,
1655 	[GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23,
1656 	[GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23,
1657 	[GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23,
1658 	[GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23,
1659 	[GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0,
1660 	[GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0,
1661 	[GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0,
1662 	[GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0,
1663 	[GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1,
1664 	[GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1,
1665 	[GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1,
1666 	[GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1
1667 };
1668 
1669 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = {
1670 	[DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE,
1671 	[DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE,
1672 	[DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE,
1673 	[DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE,
1674 	[DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE,
1675 	[DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE,
1676 	[DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE,
1677 	[DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE,
1678 	[DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE,
1679 	[DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE,
1680 	[DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE
1681 };
1682 
1683 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = {
1684 	[MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE,
1685 	[MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE,
1686 	[MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE,
1687 	[MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE
1688 };
1689 
1690 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
1691 	[TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE,
1692 	[TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE,
1693 	[TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE,
1694 	[TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE,
1695 	[TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE,
1696 	[TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE,
1697 	[TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE,
1698 	[TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE,
1699 	[TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE,
1700 	[TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE,
1701 	[TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE,
1702 	[TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE,
1703 	[TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE,
1704 	[TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE,
1705 	[TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE,
1706 	[TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE,
1707 	[TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE,
1708 	[TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE,
1709 	[TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE,
1710 	[TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE,
1711 	[TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE,
1712 	[TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE,
1713 	[TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE,
1714 	[TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE,
1715 	[TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
1716 };
1717 
1718 static const u32 gaudi2_tpc_eml_cfg_blocks_bases[TPC_ID_SIZE] = {
1719 	[TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_EML_CFG_BASE,
1720 	[TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_EML_CFG_BASE,
1721 	[TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_EML_CFG_BASE,
1722 	[TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_EML_CFG_BASE,
1723 	[TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_EML_CFG_BASE,
1724 	[TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_EML_CFG_BASE,
1725 	[TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_EML_CFG_BASE,
1726 	[TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_EML_CFG_BASE,
1727 	[TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_EML_CFG_BASE,
1728 	[TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_EML_CFG_BASE,
1729 	[TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_EML_CFG_BASE,
1730 	[TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_EML_CFG_BASE,
1731 	[TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_EML_CFG_BASE,
1732 	[TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_EML_CFG_BASE,
1733 	[TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_EML_CFG_BASE,
1734 	[TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_EML_CFG_BASE,
1735 	[TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_EML_CFG_BASE,
1736 	[TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_EML_CFG_BASE,
1737 	[TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_EML_CFG_BASE,
1738 	[TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_EML_CFG_BASE,
1739 	[TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_EML_CFG_BASE,
1740 	[TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_EML_CFG_BASE,
1741 	[TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_EML_CFG_BASE,
1742 	[TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_EML_CFG_BASE,
1743 	[TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_EML_CFG_BASE,
1744 };
1745 
1746 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
1747 	[ROTATOR_ID_0] = mmROT0_BASE,
1748 	[ROTATOR_ID_1] = mmROT1_BASE
1749 };
1750 
1751 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = {
1752 	[TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0,
1753 	[TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0,
1754 	[TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0,
1755 	[TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0,
1756 	[TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0,
1757 	[TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0,
1758 	[TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0,
1759 	[TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0,
1760 	[TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0,
1761 	[TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0,
1762 	[TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0,
1763 	[TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0,
1764 	[TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0,
1765 	[TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0,
1766 	[TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0,
1767 	[TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0,
1768 	[TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0,
1769 	[TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0,
1770 	[TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0,
1771 	[TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0,
1772 	[TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0,
1773 	[TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0,
1774 	[TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0,
1775 	[TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0,
1776 	[TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0,
1777 };
1778 
1779 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
1780 	[ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0,
1781 	[ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
1782 };
1783 
1784 static const u32 gaudi2_tpc_engine_id_to_tpc_id[] = {
1785 	[GAUDI2_DCORE0_ENGINE_ID_TPC_0] = TPC_ID_DCORE0_TPC0,
1786 	[GAUDI2_DCORE0_ENGINE_ID_TPC_1] = TPC_ID_DCORE0_TPC1,
1787 	[GAUDI2_DCORE0_ENGINE_ID_TPC_2] = TPC_ID_DCORE0_TPC2,
1788 	[GAUDI2_DCORE0_ENGINE_ID_TPC_3] = TPC_ID_DCORE0_TPC3,
1789 	[GAUDI2_DCORE0_ENGINE_ID_TPC_4] = TPC_ID_DCORE0_TPC4,
1790 	[GAUDI2_DCORE0_ENGINE_ID_TPC_5] = TPC_ID_DCORE0_TPC5,
1791 	[GAUDI2_DCORE1_ENGINE_ID_TPC_0] = TPC_ID_DCORE1_TPC0,
1792 	[GAUDI2_DCORE1_ENGINE_ID_TPC_1] = TPC_ID_DCORE1_TPC1,
1793 	[GAUDI2_DCORE1_ENGINE_ID_TPC_2] = TPC_ID_DCORE1_TPC2,
1794 	[GAUDI2_DCORE1_ENGINE_ID_TPC_3] = TPC_ID_DCORE1_TPC3,
1795 	[GAUDI2_DCORE1_ENGINE_ID_TPC_4] = TPC_ID_DCORE1_TPC4,
1796 	[GAUDI2_DCORE1_ENGINE_ID_TPC_5] = TPC_ID_DCORE1_TPC5,
1797 	[GAUDI2_DCORE2_ENGINE_ID_TPC_0] = TPC_ID_DCORE2_TPC0,
1798 	[GAUDI2_DCORE2_ENGINE_ID_TPC_1] = TPC_ID_DCORE2_TPC1,
1799 	[GAUDI2_DCORE2_ENGINE_ID_TPC_2] = TPC_ID_DCORE2_TPC2,
1800 	[GAUDI2_DCORE2_ENGINE_ID_TPC_3] = TPC_ID_DCORE2_TPC3,
1801 	[GAUDI2_DCORE2_ENGINE_ID_TPC_4] = TPC_ID_DCORE2_TPC4,
1802 	[GAUDI2_DCORE2_ENGINE_ID_TPC_5] = TPC_ID_DCORE2_TPC5,
1803 	[GAUDI2_DCORE3_ENGINE_ID_TPC_0] = TPC_ID_DCORE3_TPC0,
1804 	[GAUDI2_DCORE3_ENGINE_ID_TPC_1] = TPC_ID_DCORE3_TPC1,
1805 	[GAUDI2_DCORE3_ENGINE_ID_TPC_2] = TPC_ID_DCORE3_TPC2,
1806 	[GAUDI2_DCORE3_ENGINE_ID_TPC_3] = TPC_ID_DCORE3_TPC3,
1807 	[GAUDI2_DCORE3_ENGINE_ID_TPC_4] = TPC_ID_DCORE3_TPC4,
1808 	[GAUDI2_DCORE3_ENGINE_ID_TPC_5] = TPC_ID_DCORE3_TPC5,
1809 	/* the PCI TPC is placed last (mapped liked HW) */
1810 	[GAUDI2_DCORE0_ENGINE_ID_TPC_6] = TPC_ID_DCORE0_TPC6,
1811 };
1812 
1813 static const u32 gaudi2_mme_engine_id_to_mme_id[] = {
1814 	[GAUDI2_DCORE0_ENGINE_ID_MME] = MME_ID_DCORE0,
1815 	[GAUDI2_DCORE1_ENGINE_ID_MME] = MME_ID_DCORE1,
1816 	[GAUDI2_DCORE2_ENGINE_ID_MME] = MME_ID_DCORE2,
1817 	[GAUDI2_DCORE3_ENGINE_ID_MME] = MME_ID_DCORE3,
1818 };
1819 
1820 static const u32 gaudi2_edma_engine_id_to_edma_id[] = {
1821 	[GAUDI2_ENGINE_ID_PDMA_0] = DMA_CORE_ID_PDMA0,
1822 	[GAUDI2_ENGINE_ID_PDMA_1] = DMA_CORE_ID_PDMA1,
1823 	[GAUDI2_DCORE0_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA0,
1824 	[GAUDI2_DCORE0_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA1,
1825 	[GAUDI2_DCORE1_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA2,
1826 	[GAUDI2_DCORE1_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA3,
1827 	[GAUDI2_DCORE2_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA4,
1828 	[GAUDI2_DCORE2_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA5,
1829 	[GAUDI2_DCORE3_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA6,
1830 	[GAUDI2_DCORE3_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA7,
1831 	[GAUDI2_ENGINE_ID_KDMA] = DMA_CORE_ID_KDMA,
1832 };
1833 
1834 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1835 	GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
1836 	GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
1837 	GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
1838 	GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0,
1839 	GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
1840 	GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0,
1841 	GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0,
1842 	GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0,
1843 };
1844 
1845 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = {
1846 	"gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal",
1847 	"gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal",
1848 	"gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal",
1849 	"gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal",
1850 	"gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal",
1851 	"gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal",
1852 	"gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal",
1853 	"gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal",
1854 	"gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal",
1855 	"gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
1856 };
1857 
1858 enum rtr_id {
1859 	DCORE0_RTR0,
1860 	DCORE0_RTR1,
1861 	DCORE0_RTR2,
1862 	DCORE0_RTR3,
1863 	DCORE0_RTR4,
1864 	DCORE0_RTR5,
1865 	DCORE0_RTR6,
1866 	DCORE0_RTR7,
1867 	DCORE1_RTR0,
1868 	DCORE1_RTR1,
1869 	DCORE1_RTR2,
1870 	DCORE1_RTR3,
1871 	DCORE1_RTR4,
1872 	DCORE1_RTR5,
1873 	DCORE1_RTR6,
1874 	DCORE1_RTR7,
1875 	DCORE2_RTR0,
1876 	DCORE2_RTR1,
1877 	DCORE2_RTR2,
1878 	DCORE2_RTR3,
1879 	DCORE2_RTR4,
1880 	DCORE2_RTR5,
1881 	DCORE2_RTR6,
1882 	DCORE2_RTR7,
1883 	DCORE3_RTR0,
1884 	DCORE3_RTR1,
1885 	DCORE3_RTR2,
1886 	DCORE3_RTR3,
1887 	DCORE3_RTR4,
1888 	DCORE3_RTR5,
1889 	DCORE3_RTR6,
1890 	DCORE3_RTR7,
1891 };
1892 
1893 static const u32 gaudi2_tpc_initiator_hbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1894 	DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
1895 	DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
1896 	DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
1897 	DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
1898 	DCORE0_RTR0
1899 };
1900 
1901 static const u32 gaudi2_tpc_initiator_lbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1902 	DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2,
1903 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5,
1904 	DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, DCORE2_RTR0, DCORE2_RTR0,
1905 	DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, DCORE3_RTR7, DCORE3_RTR7,
1906 	DCORE0_RTR0
1907 };
1908 
1909 static const u32 gaudi2_dec_initiator_hbw_rtr_id[NUMBER_OF_DEC] = {
1910 	DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
1911 	DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
1912 };
1913 
1914 static const u32 gaudi2_dec_initiator_lbw_rtr_id[NUMBER_OF_DEC] = {
1915 	DCORE0_RTR1, DCORE0_RTR1, DCORE1_RTR6, DCORE1_RTR6, DCORE2_RTR1, DCORE2_RTR1,
1916 	DCORE3_RTR6, DCORE3_RTR6, DCORE0_RTR0, DCORE0_RTR0
1917 };
1918 
1919 static const u32 gaudi2_nic_initiator_hbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
1920 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1921 	DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1922 };
1923 
1924 static const u32 gaudi2_nic_initiator_lbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
1925 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1926 	DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1927 };
1928 
1929 static const u32 gaudi2_edma_initiator_hbw_sft[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1930 	mmSFT0_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1931 	mmSFT0_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1932 	mmSFT1_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1933 	mmSFT1_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1934 	mmSFT2_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1935 	mmSFT2_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1936 	mmSFT3_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1937 	mmSFT3_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE
1938 };
1939 
1940 static const u32 gaudi2_pdma_initiator_hbw_rtr_id[NUM_OF_PDMA] = {
1941 	DCORE0_RTR0, DCORE0_RTR0
1942 };
1943 
1944 static const u32 gaudi2_pdma_initiator_lbw_rtr_id[NUM_OF_PDMA] = {
1945 	DCORE0_RTR2, DCORE0_RTR2
1946 };
1947 
1948 static const u32 gaudi2_rot_initiator_hbw_rtr_id[NUM_OF_ROT] = {
1949 	DCORE2_RTR0, DCORE3_RTR7
1950 };
1951 
1952 static const u32 gaudi2_rot_initiator_lbw_rtr_id[NUM_OF_ROT] = {
1953 	DCORE2_RTR2, DCORE3_RTR5
1954 };
1955 
1956 struct mme_initiators_rtr_id {
1957 	u32 wap0;
1958 	u32 wap1;
1959 	u32 write;
1960 	u32 read;
1961 	u32 sbte0;
1962 	u32 sbte1;
1963 	u32 sbte2;
1964 	u32 sbte3;
1965 	u32 sbte4;
1966 };
1967 
1968 enum mme_initiators {
1969 	MME_WAP0 = 0,
1970 	MME_WAP1,
1971 	MME_WRITE,
1972 	MME_READ,
1973 	MME_SBTE0,
1974 	MME_SBTE1,
1975 	MME_SBTE2,
1976 	MME_SBTE3,
1977 	MME_SBTE4,
1978 	MME_INITIATORS_MAX
1979 };
1980 
1981 static const struct mme_initiators_rtr_id
1982 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = {
1983 	{ .wap0 = 5, .wap1 = 7, .write = 6, .read = 7,
1984 	.sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6},
1985 	{ .wap0 = 10, .wap1 = 8, .write = 9, .read = 8,
1986 	.sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8},
1987 	{ .wap0 = 21, .wap1 = 23, .write = 22, .read = 23,
1988 	.sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23},
1989 	{ .wap0 = 30, .wap1 = 28, .write = 29, .read = 30,
1990 	.sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28},
1991 };
1992 
1993 enum razwi_event_sources {
1994 	RAZWI_TPC,
1995 	RAZWI_MME,
1996 	RAZWI_EDMA,
1997 	RAZWI_PDMA,
1998 	RAZWI_NIC,
1999 	RAZWI_DEC,
2000 	RAZWI_ROT
2001 };
2002 
2003 struct hbm_mc_error_causes {
2004 	u32 mask;
2005 	char cause[50];
2006 };
2007 
2008 static struct hl_special_block_info gaudi2_special_blocks[] = GAUDI2_SPECIAL_BLOCKS;
2009 
2010 /* Special blocks iterator is currently used to configure security protection bits,
2011  * and read global errors. Most HW blocks are addressable and those who aren't (N/A)-
2012  * must be skipped. Following configurations are commonly used for both PB config
2013  * and global error reading, since currently they both share the same settings.
2014  * Once it changes, we must remember to use separate configurations for either one.
2015  */
2016 static int gaudi2_iterator_skip_block_types[] = {
2017 		GAUDI2_BLOCK_TYPE_PLL,
2018 		GAUDI2_BLOCK_TYPE_EU_BIST,
2019 		GAUDI2_BLOCK_TYPE_HBM,
2020 		GAUDI2_BLOCK_TYPE_XFT
2021 };
2022 
2023 static struct range gaudi2_iterator_skip_block_ranges[] = {
2024 		/* Skip all PSOC blocks except for PSOC_GLOBAL_CONF */
2025 		{mmPSOC_I2C_M0_BASE, mmPSOC_EFUSE_BASE},
2026 		{mmPSOC_BTL_BASE, mmPSOC_MSTR_IF_RR_SHRD_HBW_BASE},
2027 		/* Skip all CPU blocks except for CPU_IF */
2028 		{mmCPU_CA53_CFG_BASE, mmCPU_CA53_CFG_BASE},
2029 		{mmCPU_TIMESTAMP_BASE, mmCPU_MSTR_IF_RR_SHRD_HBW_BASE}
2030 };
2031 
2032 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = {
2033 	{HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"},
2034 	{HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"},
2035 	{HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"},
2036 	{HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"},
2037 	{HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"},
2038 };
2039 
2040 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = {
2041 	[HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even",
2042 	[HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd",
2043 	[HBM_SEI_READ_ERR] = "SEI read data error",
2044 	[HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error",
2045 	[HBM_SEI_CATTRIP] = "SEI CATTRIP asserted",
2046 	[HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail",
2047 	[HBM_SEI_DFI] = "SEI DFI error",
2048 	[HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read",
2049 	[HBM_SEI_BIST_FAIL] = "SEI BIST fail"
2050 };
2051 
2052 struct mmu_spi_sei_cause {
2053 	char cause[50];
2054 	int clear_bit;
2055 };
2056 
2057 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = {
2058 	{"page fault", 1},		/* INTERRUPT_CLR[1] */
2059 	{"page access", 1},		/* INTERRUPT_CLR[1] */
2060 	{"bypass ddr", 2},		/* INTERRUPT_CLR[2] */
2061 	{"multi hit", 2},		/* INTERRUPT_CLR[2] */
2062 	{"mmu rei0", -1},		/* no clear register bit */
2063 	{"mmu rei1", -1},		/* no clear register bit */
2064 	{"stlb rei0", -1},		/* no clear register bit */
2065 	{"stlb rei1", -1},		/* no clear register bit */
2066 	{"rr privileged write hit", 2},	/* INTERRUPT_CLR[2] */
2067 	{"rr privileged read hit", 2},	/* INTERRUPT_CLR[2] */
2068 	{"rr secure write hit", 2},	/* INTERRUPT_CLR[2] */
2069 	{"rr secure read hit", 2},	/* INTERRUPT_CLR[2] */
2070 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2071 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2072 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2073 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2074 	{"slave error", 16},		/* INTERRUPT_CLR[16] */
2075 	{"dec error", 17},		/* INTERRUPT_CLR[17] */
2076 	{"burst fifo full", 2}		/* INTERRUPT_CLR[2] */
2077 };
2078 
2079 struct gaudi2_cache_invld_params {
2080 	u64 start_va;
2081 	u64 end_va;
2082 	u32 inv_start_val;
2083 	u32 flags;
2084 	bool range_invalidation;
2085 };
2086 
2087 struct gaudi2_tpc_idle_data {
2088 	struct engines_data *e;
2089 	unsigned long *mask;
2090 	bool *is_idle;
2091 	const char *tpc_fmt;
2092 };
2093 
2094 struct gaudi2_tpc_mmu_data {
2095 	u32 rw_asid;
2096 };
2097 
2098 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0};
2099 
2100 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val);
2101 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id);
2102 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id);
2103 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2104 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2105 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
2106 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
2107 										bool is_memset);
2108 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2109 		struct engines_data *e);
2110 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2111 		struct engines_data *e);
2112 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2113 		struct engines_data *e);
2114 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
2115 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr);
2116 
2117 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
2118 {
2119 
2120 }
2121 
2122 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev)
2123 {
2124 	return sizeof(struct packet_msg_short);
2125 }
2126 
2127 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev)
2128 {
2129 	return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence);
2130 }
2131 
2132 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
2133 {
2134 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2135 	int dcore, inst, tpc_seq;
2136 	u32 offset;
2137 
2138 	/* init the return code */
2139 	ctx->rc = 0;
2140 
2141 	for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
2142 		for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
2143 			tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
2144 
2145 			if (!(prop->tpc_enabled_mask & BIT(tpc_seq)))
2146 				continue;
2147 
2148 			offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
2149 
2150 			ctx->fn(hdev, dcore, inst, offset, ctx);
2151 			if (ctx->rc) {
2152 				dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
2153 							dcore, inst);
2154 				return;
2155 			}
2156 		}
2157 	}
2158 
2159 	if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6)))
2160 		return;
2161 
2162 	/* special check for PCI TPC (DCORE0_TPC6) */
2163 	offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
2164 	ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
2165 	if (ctx->rc)
2166 		dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
2167 }
2168 
2169 static bool gaudi2_host_phys_addr_valid(u64 addr)
2170 {
2171 	if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1))
2172 		return true;
2173 
2174 	return false;
2175 }
2176 
2177 static int set_number_of_functional_hbms(struct hl_device *hdev)
2178 {
2179 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2180 	u8 faulty_hbms = hweight64(hdev->dram_binning);
2181 
2182 	/* check if all HBMs should be used */
2183 	if (!faulty_hbms) {
2184 		dev_dbg(hdev->dev, "All HBM are in use (no binning)\n");
2185 		prop->num_functional_hbms = GAUDI2_HBM_NUM;
2186 		return 0;
2187 	}
2188 
2189 	/*
2190 	 * check for error condition in which number of binning
2191 	 * candidates is higher than the maximum supported by the
2192 	 * driver (in which case binning mask shall be ignored and driver will
2193 	 * set the default)
2194 	 */
2195 	if (faulty_hbms > MAX_FAULTY_HBMS) {
2196 		dev_err(hdev->dev,
2197 			"HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n",
2198 			MAX_FAULTY_HBMS, hdev->dram_binning);
2199 		return -EINVAL;
2200 	}
2201 
2202 	/*
2203 	 * by default, number of functional HBMs in Gaudi2 is always
2204 	 * GAUDI2_HBM_NUM - 1.
2205 	 */
2206 	prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms;
2207 	return 0;
2208 }
2209 
2210 static int gaudi2_set_dram_properties(struct hl_device *hdev)
2211 {
2212 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2213 	u32 basic_hbm_page_size;
2214 	int rc;
2215 
2216 	rc = set_number_of_functional_hbms(hdev);
2217 	if (rc)
2218 		return -EINVAL;
2219 
2220 	/*
2221 	 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround
2222 	 * in which we are using x16 bigger page size to be able to populate the entire
2223 	 * HBM mappings in the TLB
2224 	 */
2225 	basic_hbm_page_size = prop->num_functional_hbms * SZ_8M;
2226 	prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size;
2227 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
2228 	prop->dram_size = prop->num_functional_hbms * SZ_16G;
2229 	prop->dram_base_address = DRAM_PHYS_BASE;
2230 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
2231 	prop->dram_supports_virtual_memory = true;
2232 
2233 	prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size;
2234 	prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK;
2235 	prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START;
2236 	prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END;
2237 
2238 	/* since DRAM page size differs from DMMU page size we need to allocate
2239 	 * DRAM memory in units of dram_page size and mapping this memory in
2240 	 * units of DMMU page size. we overcome this size mismatch using a
2241 	 * scrambling routine which takes a DRAM page and converts it to a DMMU
2242 	 * page.
2243 	 * We therefore:
2244 	 * 1. partition the virtual address space to DRAM-page (whole) pages.
2245 	 *    (suppose we get n such pages)
2246 	 * 2. limit the amount of virtual address space we got from 1 above to
2247 	 *    a multiple of 64M as we don't want the scrambled address to cross
2248 	 *    the DRAM virtual address space.
2249 	 *    ( m = (n * DRAM_page_size) / DMMU_page_size).
2250 	 * 3. determine the and address accordingly
2251 	 *    end_addr = start_addr + m * 48M
2252 	 *
2253 	 *    the DRAM address MSBs (63:48) are not part of the roundup calculation
2254 	 */
2255 	prop->dmmu.start_addr = prop->dram_base_address +
2256 			(prop->dram_page_size *
2257 				DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
2258 
2259 	prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
2260 			div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
2261 
2262 	return 0;
2263 }
2264 
2265 static int gaudi2_set_fixed_properties(struct hl_device *hdev)
2266 {
2267 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2268 	struct hw_queue_properties *q_props;
2269 	u32 num_sync_stream_queues = 0;
2270 	int i;
2271 
2272 	prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
2273 	prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
2274 					GFP_KERNEL);
2275 
2276 	if (!prop->hw_queues_props)
2277 		return -ENOMEM;
2278 
2279 	q_props = prop->hw_queues_props;
2280 
2281 	for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
2282 		q_props[i].type = QUEUE_TYPE_HW;
2283 		q_props[i].driver_only = 0;
2284 
2285 		if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) {
2286 			q_props[i].supports_sync_stream = 0;
2287 		} else {
2288 			q_props[i].supports_sync_stream = 1;
2289 			num_sync_stream_queues++;
2290 		}
2291 
2292 		q_props[i].cb_alloc_flags = CB_ALLOC_USER;
2293 	}
2294 
2295 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
2296 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1;
2297 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL;
2298 
2299 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
2300 	prop->cfg_base_address = CFG_BASE;
2301 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0;
2302 	prop->host_base_address = HOST_PHYS_BASE_0;
2303 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0;
2304 	prop->max_pending_cs = GAUDI2_MAX_PENDING_CS;
2305 	prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER;
2306 	prop->user_dec_intr_count = NUMBER_OF_DEC;
2307 	prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
2308 	prop->completion_mode = HL_COMPLETION_MODE_CS;
2309 	prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
2310 	prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
2311 
2312 	prop->sram_base_address = SRAM_BASE_ADDR;
2313 	prop->sram_size = SRAM_SIZE;
2314 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
2315 	prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET;
2316 
2317 	prop->hints_range_reservation = true;
2318 
2319 	prop->rotator_enabled_mask = BIT(NUM_OF_ROT) - 1;
2320 
2321 	if (hdev->pldm)
2322 		prop->mmu_pgt_size = 0x800000; /* 8MB */
2323 	else
2324 		prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE;
2325 
2326 	prop->mmu_pte_size = HL_PTE_SIZE;
2327 	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
2328 	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
2329 
2330 	prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
2331 	prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
2332 	prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
2333 	prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
2334 	prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT;
2335 	prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
2336 	prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
2337 	prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
2338 	prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
2339 	prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK;
2340 	prop->dmmu.page_size = PAGE_SIZE_1GB;
2341 	prop->dmmu.num_hops = MMU_ARCH_6_HOPS;
2342 	prop->dmmu.last_mask = LAST_MASK;
2343 	prop->dmmu.host_resident = 1;
2344 	prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
2345 	prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
2346 
2347 	/*
2348 	 * this is done in order to be able to validate FW descriptor (i.e. validating that
2349 	 * the addresses and allocated space for FW image does not cross memory bounds).
2350 	 * for this reason we set the DRAM size to the minimum possible and later it will
2351 	 * be modified according to what reported in the cpucp info packet
2352 	 */
2353 	prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G;
2354 
2355 	hdev->pmmu_huge_range = true;
2356 	prop->pmmu.host_resident = 1;
2357 	prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
2358 	prop->pmmu.last_mask = LAST_MASK;
2359 	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
2360 	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
2361 
2362 	prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
2363 	prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
2364 	prop->hints_host_hpage_reserved_va_range.start_addr =
2365 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
2366 	prop->hints_host_hpage_reserved_va_range.end_addr =
2367 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END;
2368 
2369 	if (PAGE_SIZE == SZ_64K) {
2370 		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K;
2371 		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K;
2372 		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K;
2373 		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K;
2374 		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K;
2375 		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K;
2376 		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K;
2377 		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K;
2378 		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K;
2379 		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K;
2380 		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K;
2381 		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K;
2382 		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2383 		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2384 		prop->pmmu.page_size = PAGE_SIZE_64KB;
2385 
2386 		/* shifts and masks are the same in PMMU and HPMMU */
2387 		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2388 		prop->pmmu_huge.page_size = PAGE_SIZE_16MB;
2389 		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2390 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2391 	} else {
2392 		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K;
2393 		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K;
2394 		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K;
2395 		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K;
2396 		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K;
2397 		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K;
2398 		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K;
2399 		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K;
2400 		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K;
2401 		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K;
2402 		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K;
2403 		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K;
2404 		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2405 		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2406 		prop->pmmu.page_size = PAGE_SIZE_4KB;
2407 
2408 		/* shifts and masks are the same in PMMU and HPMMU */
2409 		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2410 		prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
2411 		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2412 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2413 	}
2414 
2415 	prop->max_num_of_engines = GAUDI2_ENGINE_ID_SIZE;
2416 	prop->num_engine_cores = CPU_ID_MAX;
2417 	prop->cfg_size = CFG_SIZE;
2418 	prop->max_asid = MAX_ASID;
2419 	prop->num_of_events = GAUDI2_EVENT_SIZE;
2420 
2421 	prop->supports_engine_modes = true;
2422 
2423 	prop->dc_power_default = DC_POWER_DEFAULT;
2424 
2425 	prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
2426 	prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE;
2427 	prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
2428 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
2429 
2430 	strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2431 
2432 	prop->mme_master_slave_mode = 1;
2433 
2434 	prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
2435 					(num_sync_stream_queues * HL_RSVD_SOBS);
2436 
2437 	prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
2438 					(num_sync_stream_queues * HL_RSVD_MONS);
2439 
2440 	prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
2441 	prop->tpc_interrupt_id = GAUDI2_IRQ_NUM_TPC_ASSERT;
2442 	prop->eq_interrupt_id = GAUDI2_IRQ_NUM_EVENT_QUEUE;
2443 
2444 	prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
2445 
2446 	prop->fw_cpu_boot_dev_sts0_valid = false;
2447 	prop->fw_cpu_boot_dev_sts1_valid = false;
2448 	prop->hard_reset_done_by_fw = false;
2449 	prop->gic_interrupts_enable = true;
2450 
2451 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
2452 
2453 	prop->max_dec = NUMBER_OF_DEC;
2454 
2455 	prop->clk_pll_index = HL_GAUDI2_MME_PLL;
2456 
2457 	prop->dma_mask = 64;
2458 
2459 	prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0;
2460 
2461 	return 0;
2462 }
2463 
2464 static int gaudi2_pci_bars_map(struct hl_device *hdev)
2465 {
2466 	static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"};
2467 	bool is_wc[3] = {false, false, true};
2468 	int rc;
2469 
2470 	rc = hl_pci_bars_map(hdev, name, is_wc);
2471 	if (rc)
2472 		return rc;
2473 
2474 	hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR);
2475 
2476 	return 0;
2477 }
2478 
2479 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
2480 {
2481 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2482 	struct hl_inbound_pci_region pci_region;
2483 	u64 old_addr = addr;
2484 	int rc;
2485 
2486 	if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr))
2487 		return old_addr;
2488 
2489 	if (hdev->asic_prop.iatu_done_by_fw)
2490 		return U64_MAX;
2491 
2492 	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2493 	pci_region.mode = PCI_BAR_MATCH_MODE;
2494 	pci_region.bar = DRAM_BAR_ID;
2495 	pci_region.addr = addr;
2496 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
2497 	if (rc)
2498 		return U64_MAX;
2499 
2500 	if (gaudi2) {
2501 		old_addr = gaudi2->dram_bar_cur_addr;
2502 		gaudi2->dram_bar_cur_addr = addr;
2503 	}
2504 
2505 	return old_addr;
2506 }
2507 
2508 static int gaudi2_init_iatu(struct hl_device *hdev)
2509 {
2510 	struct hl_inbound_pci_region inbound_region;
2511 	struct hl_outbound_pci_region outbound_region;
2512 	u32 bar_addr_low, bar_addr_high;
2513 	int rc;
2514 
2515 	if (hdev->asic_prop.iatu_done_by_fw)
2516 		return 0;
2517 
2518 	/* Temporary inbound Region 0 - Bar 0 - Point to CFG
2519 	 * We must map this region in BAR match mode in order to
2520 	 * fetch BAR physical base address
2521 	 */
2522 	inbound_region.mode = PCI_BAR_MATCH_MODE;
2523 	inbound_region.bar = SRAM_CFG_BAR_ID;
2524 	/* Base address must be aligned to Bar size which is 256 MB */
2525 	inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF;
2526 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2527 	if (rc)
2528 		return rc;
2529 
2530 	/* Fetch physical BAR address */
2531 	bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF);
2532 	bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF;
2533 
2534 	hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low;
2535 
2536 	/* Inbound Region 0 - Bar 0 - Point to CFG */
2537 	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2538 	inbound_region.bar = SRAM_CFG_BAR_ID;
2539 	inbound_region.offset_in_bar = 0;
2540 	inbound_region.addr = STM_FLASH_BASE_ADDR;
2541 	inbound_region.size = CFG_REGION_SIZE;
2542 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2543 	if (rc)
2544 		return rc;
2545 
2546 	/* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */
2547 	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2548 	inbound_region.bar = SRAM_CFG_BAR_ID;
2549 	inbound_region.offset_in_bar = CFG_REGION_SIZE;
2550 	inbound_region.addr = BAR0_RSRVD_BASE_ADDR;
2551 	inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE;
2552 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
2553 	if (rc)
2554 		return rc;
2555 
2556 	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2557 	inbound_region.mode = PCI_BAR_MATCH_MODE;
2558 	inbound_region.bar = DRAM_BAR_ID;
2559 	inbound_region.addr = DRAM_PHYS_BASE;
2560 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
2561 	if (rc)
2562 		return rc;
2563 
2564 	/* Outbound Region 0 - Point to Host */
2565 	outbound_region.addr = HOST_PHYS_BASE_0;
2566 	outbound_region.size = HOST_PHYS_SIZE_0;
2567 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
2568 
2569 	return rc;
2570 }
2571 
2572 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev)
2573 {
2574 	return RREG32(mmHW_STATE);
2575 }
2576 
2577 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev)
2578 {
2579 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2580 
2581 	/*
2582 	 * check for error condition in which number of binning candidates
2583 	 * is higher than the maximum supported by the driver
2584 	 */
2585 	if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) {
2586 		dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n",
2587 					MAX_CLUSTER_BINNING_FAULTY_TPCS,
2588 					hdev->tpc_binning);
2589 		return -EINVAL;
2590 	}
2591 
2592 	prop->tpc_binning_mask = hdev->tpc_binning;
2593 	prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK;
2594 
2595 	return 0;
2596 }
2597 
2598 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev)
2599 {
2600 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2601 	struct hw_queue_properties *q_props = prop->hw_queues_props;
2602 	u64 tpc_binning_mask;
2603 	u8 subst_idx = 0;
2604 	int i, rc;
2605 
2606 	rc = gaudi2_tpc_binning_init_prop(hdev);
2607 	if (rc)
2608 		return rc;
2609 
2610 	tpc_binning_mask = prop->tpc_binning_mask;
2611 
2612 	for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) {
2613 		u8 subst_seq, binned, qid_base;
2614 
2615 		if (tpc_binning_mask == 0)
2616 			break;
2617 
2618 		if (subst_idx == 0) {
2619 			subst_seq = TPC_ID_DCORE0_TPC6;
2620 			qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
2621 		} else {
2622 			subst_seq = TPC_ID_DCORE3_TPC5;
2623 			qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0;
2624 		}
2625 
2626 
2627 		/* clear bit from mask */
2628 		binned = __ffs(tpc_binning_mask);
2629 		/*
2630 		 * Coverity complains about possible out-of-bound access in
2631 		 * clear_bit
2632 		 */
2633 		if (binned >= TPC_ID_SIZE) {
2634 			dev_err(hdev->dev,
2635 				"Invalid binned TPC (binning mask: %llx)\n",
2636 				tpc_binning_mask);
2637 			return -EINVAL;
2638 		}
2639 		clear_bit(binned, (unsigned long *)&tpc_binning_mask);
2640 
2641 		/* also clear replacing TPC bit from enabled mask */
2642 		clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask);
2643 
2644 		/* bin substite TPC's Qs */
2645 		q_props[qid_base].binned = 1;
2646 		q_props[qid_base + 1].binned = 1;
2647 		q_props[qid_base + 2].binned = 1;
2648 		q_props[qid_base + 3].binned = 1;
2649 
2650 		subst_idx++;
2651 	}
2652 
2653 	return 0;
2654 }
2655 
2656 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev)
2657 {
2658 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2659 	u8 num_faulty;
2660 
2661 	num_faulty = hweight32(hdev->decoder_binning);
2662 
2663 	/*
2664 	 * check for error condition in which number of binning candidates
2665 	 * is higher than the maximum supported by the driver
2666 	 */
2667 	if (num_faulty > MAX_FAULTY_DECODERS) {
2668 		dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n",
2669 						hdev->decoder_binning);
2670 		return -EINVAL;
2671 	}
2672 
2673 	prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK);
2674 
2675 	if (prop->decoder_binning_mask)
2676 		prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1));
2677 	else
2678 		prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK;
2679 
2680 	return 0;
2681 }
2682 
2683 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev)
2684 {
2685 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2686 
2687 	/* check if we should override default binning */
2688 	if (!hdev->dram_binning) {
2689 		prop->dram_binning_mask = 0;
2690 		prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK;
2691 		return;
2692 	}
2693 
2694 	/* set DRAM binning constraints */
2695 	prop->faulty_dram_cluster_map |= hdev->dram_binning;
2696 	prop->dram_binning_mask = hdev->dram_binning;
2697 	prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5);
2698 }
2699 
2700 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev)
2701 {
2702 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2703 	struct hw_queue_properties *q_props;
2704 	u8 seq, num_faulty;
2705 
2706 	num_faulty = hweight32(hdev->edma_binning);
2707 
2708 	/*
2709 	 * check for error condition in which number of binning candidates
2710 	 * is higher than the maximum supported by the driver
2711 	 */
2712 	if (num_faulty > MAX_FAULTY_EDMAS) {
2713 		dev_err(hdev->dev,
2714 			"EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n",
2715 			hdev->edma_binning);
2716 		return -EINVAL;
2717 	}
2718 
2719 	if (!hdev->edma_binning) {
2720 		prop->edma_binning_mask = 0;
2721 		prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK;
2722 		return 0;
2723 	}
2724 
2725 	seq = __ffs((unsigned long)hdev->edma_binning);
2726 
2727 	/* set binning constraints */
2728 	prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]);
2729 	prop->edma_binning_mask = hdev->edma_binning;
2730 	prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1);
2731 
2732 	/* bin substitute EDMA's queue */
2733 	q_props = prop->hw_queues_props;
2734 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1;
2735 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1;
2736 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1;
2737 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1;
2738 
2739 	return 0;
2740 }
2741 
2742 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask)
2743 {
2744 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2745 	u8 num_faulty, seq;
2746 
2747 	/* check if we should override default binning */
2748 	if (!xbar_edge_iso_mask) {
2749 		prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK;
2750 		return 0;
2751 	}
2752 
2753 	/*
2754 	 * note that it can be set to value other than 0 only after cpucp packet (i.e.
2755 	 * only the FW can set a redundancy value). for user it'll always be 0.
2756 	 */
2757 	num_faulty = hweight32(xbar_edge_iso_mask);
2758 
2759 	/*
2760 	 * check for error condition in which number of binning candidates
2761 	 * is higher than the maximum supported by the driver
2762 	 */
2763 	if (num_faulty > MAX_FAULTY_XBARS) {
2764 		dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n",
2765 									MAX_FAULTY_XBARS);
2766 		return -EINVAL;
2767 	}
2768 
2769 	seq = __ffs((unsigned long)xbar_edge_iso_mask);
2770 
2771 	/* set binning constraints */
2772 	prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]);
2773 	prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK;
2774 
2775 	return 0;
2776 }
2777 
2778 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask)
2779 {
2780 	int rc;
2781 
2782 	/*
2783 	 * mark all clusters as good, each component will "fail" cluster
2784 	 * based on eFuse/user values.
2785 	 * If more than single cluster is faulty- the chip is unusable
2786 	 */
2787 	hdev->asic_prop.faulty_dram_cluster_map = 0;
2788 
2789 	gaudi2_set_dram_binning_masks(hdev);
2790 
2791 	rc = gaudi2_set_edma_binning_masks(hdev);
2792 	if (rc)
2793 		return rc;
2794 
2795 	rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask);
2796 	if (rc)
2797 		return rc;
2798 
2799 
2800 	/* always initially set to full mask */
2801 	hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK;
2802 
2803 	return 0;
2804 }
2805 
2806 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev)
2807 {
2808 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2809 	int rc;
2810 
2811 	rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask);
2812 	if (rc)
2813 		return rc;
2814 
2815 	/* if we have DRAM binning reported by FW we should perform cluster config  */
2816 	if (prop->faulty_dram_cluster_map) {
2817 		u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map);
2818 
2819 		prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq];
2820 	}
2821 
2822 	return 0;
2823 }
2824 
2825 static int gaudi2_set_binning_masks(struct hl_device *hdev)
2826 {
2827 	int rc;
2828 
2829 	rc = gaudi2_set_cluster_binning_masks(hdev);
2830 	if (rc)
2831 		return rc;
2832 
2833 	rc = gaudi2_set_tpc_binning_masks(hdev);
2834 	if (rc)
2835 		return rc;
2836 
2837 	rc = gaudi2_set_dec_binning_masks(hdev);
2838 	if (rc)
2839 		return rc;
2840 
2841 	return 0;
2842 }
2843 
2844 static int gaudi2_cpucp_info_get(struct hl_device *hdev)
2845 {
2846 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2847 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2848 	long max_power;
2849 	u64 dram_size;
2850 	int rc;
2851 
2852 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2853 		return 0;
2854 
2855 	/* No point of asking this information again when not doing hard reset, as the device
2856 	 * CPU hasn't been reset
2857 	 */
2858 	if (hdev->reset_info.in_compute_reset)
2859 		return 0;
2860 
2861 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
2862 										mmCPU_BOOT_ERR1);
2863 	if (rc)
2864 		return rc;
2865 
2866 	dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
2867 	if (dram_size) {
2868 		/* we can have wither 5 or 6 HBMs. other values are invalid */
2869 
2870 		if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) &&
2871 					(dram_size != (GAUDI2_HBM_NUM * SZ_16G))) {
2872 			dev_err(hdev->dev,
2873 				"F/W reported invalid DRAM size %llu. Trying to use default size %llu\n",
2874 				dram_size, prop->dram_size);
2875 			dram_size = prop->dram_size;
2876 		}
2877 
2878 		prop->dram_size = dram_size;
2879 		prop->dram_end_address = prop->dram_base_address + dram_size;
2880 	}
2881 
2882 	if (!strlen(prop->cpucp_info.card_name))
2883 		strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2884 
2885 	/* Overwrite binning masks with the actual binning values from F/W */
2886 	hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
2887 	hdev->edma_binning = prop->cpucp_info.edma_binning_mask;
2888 	hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
2889 	hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
2890 
2891 	dev_dbg(hdev->dev, "Read binning masks: tpc: 0x%llx, dram: 0x%llx, edma: 0x%x, dec: 0x%x\n",
2892 			hdev->tpc_binning, hdev->dram_binning, hdev->edma_binning,
2893 			hdev->decoder_binning);
2894 
2895 	/*
2896 	 * at this point the DRAM parameters need to be updated according to data obtained
2897 	 * from the FW
2898 	 */
2899 	rc = hdev->asic_funcs->set_dram_properties(hdev);
2900 	if (rc)
2901 		return rc;
2902 
2903 	rc = hdev->asic_funcs->set_binning_masks(hdev);
2904 	if (rc)
2905 		return rc;
2906 
2907 	max_power = hl_fw_get_max_power(hdev);
2908 	if (max_power < 0)
2909 		return max_power;
2910 
2911 	prop->max_power_default = (u64) max_power;
2912 
2913 	return 0;
2914 }
2915 
2916 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
2917 {
2918 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2919 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS];
2920 	int rc;
2921 
2922 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2923 		return 0;
2924 
2925 	rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr);
2926 	if (rc)
2927 		return rc;
2928 
2929 	hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3];
2930 
2931 	return 0;
2932 }
2933 
2934 static int gaudi2_early_init(struct hl_device *hdev)
2935 {
2936 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2937 	struct pci_dev *pdev = hdev->pdev;
2938 	resource_size_t pci_bar_size;
2939 	int rc;
2940 
2941 	rc = gaudi2_set_fixed_properties(hdev);
2942 	if (rc)
2943 		return rc;
2944 
2945 	/* Check BAR sizes */
2946 	pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
2947 
2948 	if (pci_bar_size != CFG_BAR_SIZE) {
2949 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2950 			SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
2951 		rc = -ENODEV;
2952 		goto free_queue_props;
2953 	}
2954 
2955 	pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
2956 	if (pci_bar_size != MSIX_BAR_SIZE) {
2957 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2958 			MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
2959 		rc = -ENODEV;
2960 		goto free_queue_props;
2961 	}
2962 
2963 	prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
2964 	hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
2965 
2966 	/*
2967 	 * Only in pldm driver config iATU
2968 	 */
2969 	if (hdev->pldm)
2970 		hdev->asic_prop.iatu_done_by_fw = false;
2971 	else
2972 		hdev->asic_prop.iatu_done_by_fw = true;
2973 
2974 	rc = hl_pci_init(hdev);
2975 	if (rc)
2976 		goto free_queue_props;
2977 
2978 	/* Before continuing in the initialization, we need to read the preboot
2979 	 * version to determine whether we run with a security-enabled firmware
2980 	 */
2981 	rc = hl_fw_read_preboot_status(hdev);
2982 	if (rc) {
2983 		if (hdev->reset_on_preboot_fail)
2984 			/* we are already on failure flow, so don't check if hw_fini fails. */
2985 			hdev->asic_funcs->hw_fini(hdev, true, false);
2986 		goto pci_fini;
2987 	}
2988 
2989 	if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
2990 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
2991 		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
2992 		if (rc) {
2993 			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
2994 			goto pci_fini;
2995 		}
2996 	}
2997 
2998 	return 0;
2999 
3000 pci_fini:
3001 	hl_pci_fini(hdev);
3002 free_queue_props:
3003 	kfree(hdev->asic_prop.hw_queues_props);
3004 	return rc;
3005 }
3006 
3007 static int gaudi2_early_fini(struct hl_device *hdev)
3008 {
3009 	kfree(hdev->asic_prop.hw_queues_props);
3010 	hl_pci_fini(hdev);
3011 
3012 	return 0;
3013 }
3014 
3015 static bool gaudi2_is_arc_nic_owned(u64 arc_id)
3016 {
3017 	switch (arc_id) {
3018 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
3019 		return true;
3020 	default:
3021 		return false;
3022 	}
3023 }
3024 
3025 static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
3026 {
3027 	switch (arc_id) {
3028 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
3029 		return true;
3030 	default:
3031 		return false;
3032 	}
3033 }
3034 
3035 static void gaudi2_init_arcs(struct hl_device *hdev)
3036 {
3037 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3038 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3039 	u64 arc_id;
3040 	u32 i;
3041 
3042 	for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) {
3043 		if (gaudi2_is_arc_enabled(hdev, i))
3044 			continue;
3045 
3046 		gaudi2_set_arc_id_cap(hdev, i);
3047 	}
3048 
3049 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
3050 		if (!gaudi2_is_queue_enabled(hdev, i))
3051 			continue;
3052 
3053 		arc_id = gaudi2_queue_id_to_arc_id[i];
3054 		if (gaudi2_is_arc_enabled(hdev, arc_id))
3055 			continue;
3056 
3057 		if (gaudi2_is_arc_nic_owned(arc_id) &&
3058 				!(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)))
3059 			continue;
3060 
3061 		if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized &
3062 							BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)))
3063 			continue;
3064 
3065 		gaudi2_set_arc_id_cap(hdev, arc_id);
3066 	}
3067 
3068 	/* Fetch ARC scratchpad address */
3069 	hdev->asic_prop.engine_core_interrupt_reg_addr =
3070 		CFG_BASE + le32_to_cpu(dyn_regs->eng_arc_irq_ctrl);
3071 }
3072 
3073 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
3074 {
3075 	u32 reg_base, reg_val;
3076 	int rc;
3077 
3078 	switch (cpu_id) {
3079 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3:
3080 		/* Each ARC scheduler has 2 consecutive DCCM blocks */
3081 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3082 						ARC_DCCM_BLOCK_SIZE * 2, true);
3083 		if (rc)
3084 			return rc;
3085 		break;
3086 	case CPU_ID_SCHED_ARC4:
3087 	case CPU_ID_SCHED_ARC5:
3088 	case CPU_ID_MME_QMAN_ARC0:
3089 	case CPU_ID_MME_QMAN_ARC1:
3090 		reg_base = gaudi2_arc_blocks_bases[cpu_id];
3091 
3092 		/* Scrub lower DCCM block */
3093 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3094 						ARC_DCCM_BLOCK_SIZE, true);
3095 		if (rc)
3096 			return rc;
3097 
3098 		/* Switch to upper DCCM block */
3099 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1);
3100 		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3101 
3102 		/* Scrub upper DCCM block */
3103 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3104 						ARC_DCCM_BLOCK_SIZE, true);
3105 		if (rc)
3106 			return rc;
3107 
3108 		/* Switch to lower DCCM block */
3109 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0);
3110 		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3111 		break;
3112 	default:
3113 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3114 						ARC_DCCM_BLOCK_SIZE, true);
3115 		if (rc)
3116 			return rc;
3117 	}
3118 
3119 	return 0;
3120 }
3121 
3122 static int gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
3123 {
3124 	u16 arc_id;
3125 	int rc;
3126 
3127 	for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
3128 		if (!gaudi2_is_arc_enabled(hdev, arc_id))
3129 			continue;
3130 
3131 		rc = gaudi2_scrub_arc_dccm(hdev, arc_id);
3132 		if (rc)
3133 			return rc;
3134 	}
3135 
3136 	return 0;
3137 }
3138 
3139 static int gaudi2_late_init(struct hl_device *hdev)
3140 {
3141 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3142 	int rc;
3143 
3144 	hdev->asic_prop.supports_advanced_cpucp_rc = true;
3145 
3146 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
3147 					gaudi2->virt_msix_db_dma_addr);
3148 	if (rc) {
3149 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
3150 		return rc;
3151 	}
3152 
3153 	rc = gaudi2_fetch_psoc_frequency(hdev);
3154 	if (rc) {
3155 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
3156 		goto disable_pci_access;
3157 	}
3158 
3159 	gaudi2_init_arcs(hdev);
3160 
3161 	rc = gaudi2_scrub_arcs_dccm(hdev);
3162 	if (rc) {
3163 		dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
3164 		goto disable_pci_access;
3165 	}
3166 
3167 	gaudi2_init_security(hdev);
3168 
3169 	return 0;
3170 
3171 disable_pci_access:
3172 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
3173 
3174 	return rc;
3175 }
3176 
3177 static void gaudi2_late_fini(struct hl_device *hdev)
3178 {
3179 	hl_hwmon_release_resources(hdev);
3180 }
3181 
3182 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
3183 {
3184 	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3185 
3186 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3187 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3188 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3189 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3190 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3191 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3192 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3193 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3194 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3195 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3196 }
3197 
3198 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev)
3199 {
3200 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3201 	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3202 	u32 block_size, umr_start_idx, num_umr_blocks;
3203 	int i;
3204 
3205 	for (i = 0 ; i < NUM_ARC_CPUS ; i++) {
3206 		if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3)
3207 			block_size = ARC_DCCM_BLOCK_SIZE * 2;
3208 		else
3209 			block_size = ARC_DCCM_BLOCK_SIZE;
3210 
3211 		blocks[i].address = gaudi2_arc_dccm_bases[i];
3212 		blocks[i].size = block_size;
3213 	}
3214 
3215 	blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE;
3216 	blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE;
3217 
3218 	blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE;
3219 	blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE;
3220 
3221 	blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE;
3222 	blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE;
3223 
3224 	blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE;
3225 	blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE;
3226 
3227 	blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE;
3228 	blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE;
3229 
3230 	blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE;
3231 	blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE;
3232 
3233 	blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE;
3234 	blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE;
3235 
3236 	blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE;
3237 	blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE;
3238 
3239 	umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS;
3240 	num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS;
3241 	for (i = 0 ; i < num_umr_blocks ; i++) {
3242 		u8 nic_id, umr_block_id;
3243 
3244 		nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS;
3245 		umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS;
3246 
3247 		blocks[umr_start_idx + i].address =
3248 			mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE +
3249 			(nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET +
3250 			(nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET +
3251 			umr_block_id * NIC_UMR_OFFSET;
3252 		blocks[umr_start_idx + i].size = HL_BLOCK_SIZE;
3253 	}
3254 
3255 	/* Expose decoder HW configuration block to user */
3256 	gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX);
3257 
3258 	for (i = 1; i < NUM_OF_DCORES; ++i) {
3259 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE;
3260 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE;
3261 
3262 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address =
3263 						mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET;
3264 
3265 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address =
3266 						mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET;
3267 	}
3268 }
3269 
3270 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
3271 {
3272 	dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
3273 	void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {};
3274 	int i, j, rc = 0;
3275 
3276 	/* The device ARC works with 32-bits addresses, and because there is a single HW register
3277 	 * that holds the extension bits (49..28), these bits must be identical in all the allocated
3278 	 * range.
3279 	 */
3280 
3281 	for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
3282 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
3283 							&dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO);
3284 		if (!virt_addr_arr[i]) {
3285 			rc = -ENOMEM;
3286 			goto free_dma_mem_arr;
3287 		}
3288 
3289 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
3290 		if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr))
3291 			break;
3292 	}
3293 
3294 	if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) {
3295 		dev_err(hdev->dev,
3296 			"MSB of ARC accessible DMA memory are not identical in all range\n");
3297 		rc = -EFAULT;
3298 		goto free_dma_mem_arr;
3299 	}
3300 
3301 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
3302 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
3303 
3304 free_dma_mem_arr:
3305 	for (j = 0 ; j < i ; j++)
3306 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
3307 						dma_addr_arr[j]);
3308 
3309 	return rc;
3310 }
3311 
3312 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev)
3313 {
3314 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3315 	struct pci_mem_region *region;
3316 
3317 	/* CFG */
3318 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
3319 	region->region_base = CFG_BASE;
3320 	region->region_size = CFG_SIZE;
3321 	region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR;
3322 	region->bar_size = CFG_BAR_SIZE;
3323 	region->bar_id = SRAM_CFG_BAR_ID;
3324 	region->used = 1;
3325 
3326 	/* SRAM */
3327 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
3328 	region->region_base = SRAM_BASE_ADDR;
3329 	region->region_size = SRAM_SIZE;
3330 	region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE;
3331 	region->bar_size = CFG_BAR_SIZE;
3332 	region->bar_id = SRAM_CFG_BAR_ID;
3333 	region->used = 1;
3334 
3335 	/* DRAM */
3336 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
3337 	region->region_base = DRAM_PHYS_BASE;
3338 	region->region_size = hdev->asic_prop.dram_size;
3339 	region->offset_in_bar = 0;
3340 	region->bar_size = prop->dram_pci_bar_size;
3341 	region->bar_id = DRAM_BAR_ID;
3342 	region->used = 1;
3343 }
3344 
3345 static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
3346 {
3347 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3348 	int i, j, k;
3349 
3350 	/* Initialize TPC interrupt */
3351 	HL_USR_INTR_STRUCT_INIT(hdev->tpc_interrupt, hdev, 0, HL_USR_INTERRUPT_TPC);
3352 
3353 	/* Initialize unexpected error interrupt */
3354 	HL_USR_INTR_STRUCT_INIT(hdev->unexpected_error_interrupt, hdev, 0,
3355 						HL_USR_INTERRUPT_UNEXPECTED);
3356 
3357 	/* Initialize common user CQ interrupt */
3358 	HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
3359 				HL_COMMON_USER_CQ_INTERRUPT_ID, HL_USR_INTERRUPT_CQ);
3360 
3361 	/* Initialize common decoder interrupt */
3362 	HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev,
3363 				HL_COMMON_DEC_INTERRUPT_ID, HL_USR_INTERRUPT_DECODER);
3364 
3365 	/* User interrupts structure holds both decoder and user interrupts from various engines.
3366 	 * We first initialize the decoder interrupts and then we add the user interrupts.
3367 	 * The only limitation is that the last decoder interrupt id must be smaller
3368 	 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time.
3369 	 */
3370 
3371 	/* Initialize decoder interrupts, expose only normal interrupts,
3372 	 * error interrupts to be handled by driver
3373 	 */
3374 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM;
3375 										i += 2, j++)
3376 		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i,
3377 						HL_USR_INTERRUPT_DECODER);
3378 
3379 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++)
3380 		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, HL_USR_INTERRUPT_CQ);
3381 }
3382 
3383 static inline int gaudi2_get_non_zero_random_int(void)
3384 {
3385 	int rand = get_random_u32();
3386 
3387 	return rand ? rand : 1;
3388 }
3389 
3390 static void gaudi2_special_blocks_free(struct hl_device *hdev)
3391 {
3392 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3393 	struct hl_skip_blocks_cfg *skip_special_blocks_cfg =
3394 			&prop->skip_special_blocks_cfg;
3395 
3396 	kfree(prop->special_blocks);
3397 	kfree(skip_special_blocks_cfg->block_types);
3398 	kfree(skip_special_blocks_cfg->block_ranges);
3399 }
3400 
3401 static void gaudi2_special_blocks_iterator_free(struct hl_device *hdev)
3402 {
3403 	gaudi2_special_blocks_free(hdev);
3404 }
3405 
3406 static bool gaudi2_special_block_skip(struct hl_device *hdev,
3407 		struct hl_special_blocks_cfg *special_blocks_cfg,
3408 		u32 blk_idx, u32 major, u32 minor, u32 sub_minor)
3409 {
3410 	return false;
3411 }
3412 
3413 static int gaudi2_special_blocks_config(struct hl_device *hdev)
3414 {
3415 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3416 	int i, rc;
3417 
3418 	/* Configure Special blocks */
3419 	prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE;
3420 	prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks);
3421 	prop->special_blocks = kmalloc_array(prop->num_of_special_blocks,
3422 			sizeof(*prop->special_blocks), GFP_KERNEL);
3423 	if (!prop->special_blocks)
3424 		return -ENOMEM;
3425 
3426 	for (i = 0 ; i < prop->num_of_special_blocks ; i++)
3427 		memcpy(&prop->special_blocks[i], &gaudi2_special_blocks[i],
3428 				sizeof(*prop->special_blocks));
3429 
3430 	/* Configure when to skip Special blocks */
3431 	memset(&prop->skip_special_blocks_cfg, 0, sizeof(prop->skip_special_blocks_cfg));
3432 	prop->skip_special_blocks_cfg.skip_block_hook = gaudi2_special_block_skip;
3433 
3434 	if (ARRAY_SIZE(gaudi2_iterator_skip_block_types)) {
3435 		prop->skip_special_blocks_cfg.block_types =
3436 				kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_types),
3437 					sizeof(gaudi2_iterator_skip_block_types[0]), GFP_KERNEL);
3438 		if (!prop->skip_special_blocks_cfg.block_types) {
3439 			rc = -ENOMEM;
3440 			goto free_special_blocks;
3441 		}
3442 
3443 		memcpy(prop->skip_special_blocks_cfg.block_types, gaudi2_iterator_skip_block_types,
3444 				sizeof(gaudi2_iterator_skip_block_types));
3445 
3446 		prop->skip_special_blocks_cfg.block_types_len =
3447 					ARRAY_SIZE(gaudi2_iterator_skip_block_types);
3448 	}
3449 
3450 	if (ARRAY_SIZE(gaudi2_iterator_skip_block_ranges)) {
3451 		prop->skip_special_blocks_cfg.block_ranges =
3452 				kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_ranges),
3453 					sizeof(gaudi2_iterator_skip_block_ranges[0]), GFP_KERNEL);
3454 		if (!prop->skip_special_blocks_cfg.block_ranges) {
3455 			rc = -ENOMEM;
3456 			goto free_skip_special_blocks_types;
3457 		}
3458 
3459 		for (i = 0 ; i < ARRAY_SIZE(gaudi2_iterator_skip_block_ranges) ; i++)
3460 			memcpy(&prop->skip_special_blocks_cfg.block_ranges[i],
3461 					&gaudi2_iterator_skip_block_ranges[i],
3462 					sizeof(struct range));
3463 
3464 		prop->skip_special_blocks_cfg.block_ranges_len =
3465 					ARRAY_SIZE(gaudi2_iterator_skip_block_ranges);
3466 	}
3467 
3468 	return 0;
3469 
3470 free_skip_special_blocks_types:
3471 	kfree(prop->skip_special_blocks_cfg.block_types);
3472 free_special_blocks:
3473 	kfree(prop->special_blocks);
3474 
3475 	return rc;
3476 }
3477 
3478 static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev)
3479 {
3480 	return gaudi2_special_blocks_config(hdev);
3481 }
3482 
3483 static void gaudi2_test_queues_msgs_free(struct hl_device *hdev)
3484 {
3485 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3486 	struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3487 	int i;
3488 
3489 	for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3490 		/* bail-out if this is an allocation failure point */
3491 		if (!msg_info[i].kern_addr)
3492 			break;
3493 
3494 		hl_asic_dma_pool_free(hdev, msg_info[i].kern_addr, msg_info[i].dma_addr);
3495 		msg_info[i].kern_addr = NULL;
3496 	}
3497 }
3498 
3499 static int gaudi2_test_queues_msgs_alloc(struct hl_device *hdev)
3500 {
3501 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3502 	struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3503 	int i, rc;
3504 
3505 	/* allocate a message-short buf for each Q we intend to test */
3506 	for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3507 		msg_info[i].kern_addr =
3508 			(void *)hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_short),
3509 							GFP_KERNEL, &msg_info[i].dma_addr);
3510 		if (!msg_info[i].kern_addr) {
3511 			dev_err(hdev->dev,
3512 				"Failed to allocate dma memory for H/W queue %d testing\n", i);
3513 			rc = -ENOMEM;
3514 			goto err_exit;
3515 		}
3516 	}
3517 
3518 	return 0;
3519 
3520 err_exit:
3521 	gaudi2_test_queues_msgs_free(hdev);
3522 	return rc;
3523 }
3524 
3525 static int gaudi2_sw_init(struct hl_device *hdev)
3526 {
3527 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3528 	struct gaudi2_device *gaudi2;
3529 	int i, rc;
3530 
3531 	/* Allocate device structure */
3532 	gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL);
3533 	if (!gaudi2)
3534 		return -ENOMEM;
3535 
3536 	for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) {
3537 		if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid)
3538 			continue;
3539 
3540 		if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) {
3541 			dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n",
3542 				GAUDI2_EVENT_SIZE);
3543 			rc = -EINVAL;
3544 			goto free_gaudi2_device;
3545 		}
3546 
3547 		gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id;
3548 	}
3549 
3550 	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++)
3551 		gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int();
3552 
3553 	gaudi2->cpucp_info_get = gaudi2_cpucp_info_get;
3554 
3555 	hdev->asic_specific = gaudi2;
3556 
3557 	/* Create DMA pool for small allocations.
3558 	 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped
3559 	 * PI/CI registers allocated from this pool have this restriction
3560 	 */
3561 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev,
3562 					GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0);
3563 	if (!hdev->dma_pool) {
3564 		dev_err(hdev->dev, "failed to create DMA pool\n");
3565 		rc = -ENOMEM;
3566 		goto free_gaudi2_device;
3567 	}
3568 
3569 	rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev);
3570 	if (rc)
3571 		goto free_dma_pool;
3572 
3573 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
3574 	if (!hdev->cpu_accessible_dma_pool) {
3575 		dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n");
3576 		rc = -ENOMEM;
3577 		goto free_cpu_dma_mem;
3578 	}
3579 
3580 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem,
3581 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
3582 	if (rc) {
3583 		dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n");
3584 		rc = -EFAULT;
3585 		goto free_cpu_accessible_dma_pool;
3586 	}
3587 
3588 	gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
3589 								&gaudi2->virt_msix_db_dma_addr);
3590 	if (!gaudi2->virt_msix_db_cpu_addr) {
3591 		dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
3592 		rc = -ENOMEM;
3593 		goto free_cpu_accessible_dma_pool;
3594 	}
3595 
3596 	spin_lock_init(&gaudi2->hw_queues_lock);
3597 
3598 	gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE,
3599 							&gaudi2->scratchpad_bus_address,
3600 							GFP_KERNEL | __GFP_ZERO);
3601 	if (!gaudi2->scratchpad_kernel_address) {
3602 		rc = -ENOMEM;
3603 		goto free_virt_msix_db_mem;
3604 	}
3605 
3606 	gaudi2_user_mapped_blocks_init(hdev);
3607 
3608 	/* Initialize user interrupts */
3609 	gaudi2_user_interrupt_setup(hdev);
3610 
3611 	hdev->supports_coresight = true;
3612 	hdev->supports_sync_stream = true;
3613 	hdev->supports_cb_mapping = true;
3614 	hdev->supports_wait_for_multi_cs = false;
3615 
3616 	prop->supports_compute_reset = true;
3617 
3618 	hdev->asic_funcs->set_pci_memory_regions(hdev);
3619 
3620 	rc = gaudi2_special_blocks_iterator_config(hdev);
3621 	if (rc)
3622 		goto free_scratchpad_mem;
3623 
3624 	rc = gaudi2_test_queues_msgs_alloc(hdev);
3625 	if (rc)
3626 		goto special_blocks_free;
3627 
3628 	return 0;
3629 
3630 special_blocks_free:
3631 	gaudi2_special_blocks_iterator_free(hdev);
3632 free_scratchpad_mem:
3633 	hl_asic_dma_pool_free(hdev, gaudi2->scratchpad_kernel_address,
3634 				gaudi2->scratchpad_bus_address);
3635 free_virt_msix_db_mem:
3636 	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3637 free_cpu_accessible_dma_pool:
3638 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3639 free_cpu_dma_mem:
3640 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3641 					hdev->cpu_accessible_dma_address);
3642 free_dma_pool:
3643 	dma_pool_destroy(hdev->dma_pool);
3644 free_gaudi2_device:
3645 	kfree(gaudi2);
3646 	return rc;
3647 }
3648 
3649 static int gaudi2_sw_fini(struct hl_device *hdev)
3650 {
3651 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3652 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3653 
3654 	gaudi2_test_queues_msgs_free(hdev);
3655 
3656 	gaudi2_special_blocks_iterator_free(hdev);
3657 
3658 	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3659 
3660 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3661 
3662 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3663 						hdev->cpu_accessible_dma_address);
3664 
3665 	hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3666 					gaudi2->scratchpad_bus_address);
3667 
3668 	dma_pool_destroy(hdev->dma_pool);
3669 
3670 	kfree(gaudi2);
3671 
3672 	return 0;
3673 }
3674 
3675 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base)
3676 {
3677 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP |
3678 						QM_GLBL_CFG1_CQF_STOP |
3679 						QM_GLBL_CFG1_CP_STOP);
3680 
3681 	/* stop also the ARC */
3682 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP);
3683 }
3684 
3685 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base)
3686 {
3687 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH |
3688 						QM_GLBL_CFG1_CQF_FLUSH |
3689 						QM_GLBL_CFG1_CP_FLUSH);
3690 }
3691 
3692 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base)
3693 {
3694 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH);
3695 }
3696 
3697 /**
3698  * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters
3699  *
3700  * @hdev: pointer to the habanalabs device structure
3701  * @queue_id: queue to clear fence counters to
3702  * @skip_fence: if true set maximum fence value to all fence counters to avoid
3703  *              getting stuck on any fence value. otherwise set all fence
3704  *              counters to 0 (standard clear of fence counters)
3705  */
3706 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id,
3707 						bool skip_fence)
3708 {
3709 	u32 size, reg_base;
3710 	u32 addr, val;
3711 
3712 	reg_base = gaudi2_qm_blocks_bases[queue_id];
3713 
3714 	addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET;
3715 	size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0;
3716 
3717 	/*
3718 	 * in case we want to make sure that QM that is stuck on a fence will
3719 	 * be released we should set the fence counter to a higher value that
3720 	 * the value the QM waiting for. to comply with any fence counter of
3721 	 * any value we set maximum fence value to all counters
3722 	 */
3723 	val = skip_fence ? U32_MAX : 0;
3724 	gaudi2_memset_device_lbw(hdev, addr, size, val);
3725 }
3726 
3727 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id)
3728 {
3729 	u32 reg_base = gaudi2_qm_blocks_bases[queue_id];
3730 
3731 	gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true);
3732 	gaudi2_flush_qman_common(hdev, reg_base);
3733 	gaudi2_flush_qman_arc_common(hdev, reg_base);
3734 }
3735 
3736 static void gaudi2_stop_dma_qmans(struct hl_device *hdev)
3737 {
3738 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3739 	int dcore, inst;
3740 
3741 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3742 		goto stop_edma_qmans;
3743 
3744 	/* Stop CPs of PDMA QMANs */
3745 	gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE);
3746 	gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE);
3747 
3748 stop_edma_qmans:
3749 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3750 		return;
3751 
3752 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3753 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3754 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3755 			u32 qm_base;
3756 
3757 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3758 				continue;
3759 
3760 			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3761 					inst * DCORE_EDMA_OFFSET;
3762 
3763 			/* Stop CPs of EDMA QMANs */
3764 			gaudi2_stop_qman_common(hdev, qm_base);
3765 		}
3766 	}
3767 }
3768 
3769 static void gaudi2_stop_mme_qmans(struct hl_device *hdev)
3770 {
3771 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3772 	u32 offset, i;
3773 
3774 	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3775 
3776 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
3777 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)))
3778 			continue;
3779 
3780 		gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3781 	}
3782 }
3783 
3784 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev)
3785 {
3786 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3787 	u32 reg_base;
3788 	int i;
3789 
3790 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3791 		return;
3792 
3793 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3794 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3795 			continue;
3796 
3797 		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3798 		gaudi2_stop_qman_common(hdev, reg_base);
3799 	}
3800 }
3801 
3802 static void gaudi2_stop_rot_qmans(struct hl_device *hdev)
3803 {
3804 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3805 	u32 reg_base;
3806 	int i;
3807 
3808 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3809 		return;
3810 
3811 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3812 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3813 			continue;
3814 
3815 		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3816 		gaudi2_stop_qman_common(hdev, reg_base);
3817 	}
3818 }
3819 
3820 static void gaudi2_stop_nic_qmans(struct hl_device *hdev)
3821 {
3822 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3823 	u32 reg_base, queue_id;
3824 	int i;
3825 
3826 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3827 		return;
3828 
3829 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3830 
3831 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3832 		if (!(hdev->nic_ports_mask & BIT(i)))
3833 			continue;
3834 
3835 		reg_base = gaudi2_qm_blocks_bases[queue_id];
3836 		gaudi2_stop_qman_common(hdev, reg_base);
3837 	}
3838 }
3839 
3840 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base)
3841 {
3842 	u32 reg_val;
3843 
3844 	reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1);
3845 	WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val);
3846 }
3847 
3848 static void gaudi2_dma_stall(struct hl_device *hdev)
3849 {
3850 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3851 	int dcore, inst;
3852 
3853 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3854 		goto stall_edma;
3855 
3856 	gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE);
3857 	gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE);
3858 
3859 stall_edma:
3860 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3861 		return;
3862 
3863 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3864 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3865 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3866 			u32 core_base;
3867 
3868 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3869 				continue;
3870 
3871 			core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET +
3872 					inst * DCORE_EDMA_OFFSET;
3873 
3874 			/* Stall CPs of EDMA QMANs */
3875 			gaudi2_stall_dma_common(hdev, core_base);
3876 		}
3877 	}
3878 }
3879 
3880 static void gaudi2_mme_stall(struct hl_device *hdev)
3881 {
3882 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3883 	u32 offset, i;
3884 
3885 	offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL;
3886 
3887 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
3888 		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3889 			WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1);
3890 }
3891 
3892 static void gaudi2_tpc_stall(struct hl_device *hdev)
3893 {
3894 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3895 	u32 reg_base;
3896 	int i;
3897 
3898 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3899 		return;
3900 
3901 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3902 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3903 			continue;
3904 
3905 		reg_base = gaudi2_tpc_cfg_blocks_bases[i];
3906 		WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1);
3907 	}
3908 }
3909 
3910 static void gaudi2_rotator_stall(struct hl_device *hdev)
3911 {
3912 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3913 	u32 reg_val;
3914 	int i;
3915 
3916 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3917 		return;
3918 
3919 	reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) |
3920 			FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) |
3921 			FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1);
3922 
3923 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3924 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3925 			continue;
3926 
3927 		WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val);
3928 	}
3929 }
3930 
3931 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base)
3932 {
3933 	WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0);
3934 }
3935 
3936 static void gaudi2_disable_dma_qmans(struct hl_device *hdev)
3937 {
3938 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3939 	int dcore, inst;
3940 
3941 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3942 		goto stop_edma_qmans;
3943 
3944 	gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE);
3945 	gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE);
3946 
3947 stop_edma_qmans:
3948 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3949 		return;
3950 
3951 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3952 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3953 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3954 			u32 qm_base;
3955 
3956 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3957 				continue;
3958 
3959 			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3960 					inst * DCORE_EDMA_OFFSET;
3961 
3962 			/* Disable CPs of EDMA QMANs */
3963 			gaudi2_disable_qman_common(hdev, qm_base);
3964 		}
3965 	}
3966 }
3967 
3968 static void gaudi2_disable_mme_qmans(struct hl_device *hdev)
3969 {
3970 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3971 	u32 offset, i;
3972 
3973 	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3974 
3975 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
3976 		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3977 			gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3978 }
3979 
3980 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev)
3981 {
3982 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3983 	u32 reg_base;
3984 	int i;
3985 
3986 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3987 		return;
3988 
3989 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3990 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3991 			continue;
3992 
3993 		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3994 		gaudi2_disable_qman_common(hdev, reg_base);
3995 	}
3996 }
3997 
3998 static void gaudi2_disable_rot_qmans(struct hl_device *hdev)
3999 {
4000 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4001 	u32 reg_base;
4002 	int i;
4003 
4004 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
4005 		return;
4006 
4007 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
4008 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
4009 			continue;
4010 
4011 		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
4012 		gaudi2_disable_qman_common(hdev, reg_base);
4013 	}
4014 }
4015 
4016 static void gaudi2_disable_nic_qmans(struct hl_device *hdev)
4017 {
4018 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4019 	u32 reg_base, queue_id;
4020 	int i;
4021 
4022 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4023 		return;
4024 
4025 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4026 
4027 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4028 		if (!(hdev->nic_ports_mask & BIT(i)))
4029 			continue;
4030 
4031 		reg_base = gaudi2_qm_blocks_bases[queue_id];
4032 		gaudi2_disable_qman_common(hdev, reg_base);
4033 	}
4034 }
4035 
4036 static void gaudi2_enable_timestamp(struct hl_device *hdev)
4037 {
4038 	/* Disable the timestamp counter */
4039 	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4040 
4041 	/* Zero the lower/upper parts of the 64-bit counter */
4042 	WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0);
4043 	WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0);
4044 
4045 	/* Enable the counter */
4046 	WREG32(mmPSOC_TIMESTAMP_BASE, 1);
4047 }
4048 
4049 static void gaudi2_disable_timestamp(struct hl_device *hdev)
4050 {
4051 	/* Disable the timestamp counter */
4052 	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4053 }
4054 
4055 static const char *gaudi2_irq_name(u16 irq_number)
4056 {
4057 	switch (irq_number) {
4058 	case GAUDI2_IRQ_NUM_EVENT_QUEUE:
4059 		return "gaudi2 cpu eq";
4060 	case GAUDI2_IRQ_NUM_COMPLETION:
4061 		return "gaudi2 completion";
4062 	case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
4063 		return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
4064 	case GAUDI2_IRQ_NUM_TPC_ASSERT:
4065 		return "gaudi2 tpc assert";
4066 	case GAUDI2_IRQ_NUM_UNEXPECTED_ERROR:
4067 		return "gaudi2 unexpected error";
4068 	case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
4069 		return "gaudi2 user completion";
4070 	default:
4071 		return "invalid";
4072 	}
4073 }
4074 
4075 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
4076 {
4077 	int i, irq, relative_idx;
4078 	struct hl_dec *dec;
4079 
4080 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) {
4081 		irq = pci_irq_vector(hdev->pdev, i);
4082 		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4083 
4084 		dec = hdev->dec + relative_idx / 2;
4085 
4086 		/* We pass different structures depending on the irq handler. For the abnormal
4087 		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4088 		 * user_interrupt entry
4089 		 */
4090 		free_irq(irq, ((relative_idx % 2) ?
4091 				(void *) dec :
4092 				(void *) &hdev->user_interrupt[dec->core_id]));
4093 	}
4094 }
4095 
4096 static int gaudi2_dec_enable_msix(struct hl_device *hdev)
4097 {
4098 	int rc, i, irq_init_cnt, irq, relative_idx;
4099 	struct hl_dec *dec;
4100 
4101 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
4102 			i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM;
4103 			i++, irq_init_cnt++) {
4104 
4105 		irq = pci_irq_vector(hdev->pdev, i);
4106 		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4107 
4108 		/* We pass different structures depending on the irq handler. For the abnormal
4109 		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4110 		 * user_interrupt entry
4111 		 *
4112 		 * TODO: change the dec abnrm to threaded irq
4113 		 */
4114 
4115 		dec = hdev->dec + relative_idx / 2;
4116 		if (relative_idx % 2) {
4117 			rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0,
4118 						gaudi2_irq_name(i), (void *) dec);
4119 		} else {
4120 			rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4121 					hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4122 					gaudi2_irq_name(i),
4123 					(void *) &hdev->user_interrupt[dec->core_id]);
4124 		}
4125 
4126 		if (rc) {
4127 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4128 			goto free_dec_irqs;
4129 		}
4130 	}
4131 
4132 	return 0;
4133 
4134 free_dec_irqs:
4135 	gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt));
4136 	return rc;
4137 }
4138 
4139 static int gaudi2_enable_msix(struct hl_device *hdev)
4140 {
4141 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4142 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4143 	int rc, irq, i, j, user_irq_init_cnt;
4144 	struct hl_cq *cq;
4145 
4146 	if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
4147 		return 0;
4148 
4149 	rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
4150 					PCI_IRQ_MSIX);
4151 	if (rc < 0) {
4152 		dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n",
4153 			GAUDI2_MSIX_ENTRIES, rc);
4154 		return rc;
4155 	}
4156 
4157 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4158 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4159 	rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
4160 	if (rc) {
4161 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4162 		goto free_irq_vectors;
4163 	}
4164 
4165 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4166 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE),
4167 			&hdev->event_queue);
4168 	if (rc) {
4169 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4170 		goto free_completion_irq;
4171 	}
4172 
4173 	rc = gaudi2_dec_enable_msix(hdev);
4174 	if (rc) {
4175 		dev_err(hdev->dev, "Failed to enable decoder IRQ");
4176 		goto free_event_irq;
4177 	}
4178 
4179 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4180 	rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4181 			hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4182 			gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT), &hdev->tpc_interrupt);
4183 	if (rc) {
4184 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4185 		goto free_dec_irq;
4186 	}
4187 
4188 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4189 	rc = request_irq(irq, hl_irq_handler_user_interrupt, 0,
4190 			gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
4191 					&hdev->unexpected_error_interrupt);
4192 	if (rc) {
4193 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4194 		goto free_tpc_irq;
4195 	}
4196 
4197 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
4198 			user_irq_init_cnt < prop->user_interrupt_count;
4199 			i++, j++, user_irq_init_cnt++) {
4200 
4201 		irq = pci_irq_vector(hdev->pdev, i);
4202 		rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4203 						hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4204 						gaudi2_irq_name(i), &hdev->user_interrupt[j]);
4205 
4206 		if (rc) {
4207 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4208 			goto free_user_irq;
4209 		}
4210 	}
4211 
4212 	gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
4213 
4214 	return 0;
4215 
4216 free_user_irq:
4217 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count;
4218 			i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
4219 
4220 		irq = pci_irq_vector(hdev->pdev, i);
4221 		free_irq(irq, &hdev->user_interrupt[j]);
4222 	}
4223 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4224 	free_irq(irq, &hdev->unexpected_error_interrupt);
4225 free_tpc_irq:
4226 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4227 	free_irq(irq, &hdev->tpc_interrupt);
4228 free_dec_irq:
4229 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_DEC_LAST + 1);
4230 free_event_irq:
4231 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4232 	free_irq(irq, cq);
4233 
4234 free_completion_irq:
4235 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4236 	free_irq(irq, cq);
4237 
4238 free_irq_vectors:
4239 	pci_free_irq_vectors(hdev->pdev);
4240 
4241 	return rc;
4242 }
4243 
4244 static void gaudi2_sync_irqs(struct hl_device *hdev)
4245 {
4246 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4247 	int i, j;
4248 	int irq;
4249 
4250 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4251 		return;
4252 
4253 	/* Wait for all pending IRQs to be finished */
4254 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION));
4255 
4256 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) {
4257 		irq = pci_irq_vector(hdev->pdev, i);
4258 		synchronize_irq(irq);
4259 	}
4260 
4261 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT));
4262 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR));
4263 
4264 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
4265 										i++, j++) {
4266 		irq = pci_irq_vector(hdev->pdev, i);
4267 		synchronize_irq(irq);
4268 	}
4269 
4270 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
4271 }
4272 
4273 static void gaudi2_disable_msix(struct hl_device *hdev)
4274 {
4275 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4276 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4277 	struct hl_cq *cq;
4278 	int irq, i, j, k;
4279 
4280 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4281 		return;
4282 
4283 	gaudi2_sync_irqs(hdev);
4284 
4285 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4286 	free_irq(irq, &hdev->event_queue);
4287 
4288 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
4289 
4290 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4291 	free_irq(irq, &hdev->tpc_interrupt);
4292 
4293 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4294 	free_irq(irq, &hdev->unexpected_error_interrupt);
4295 
4296 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
4297 			k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
4298 
4299 		irq = pci_irq_vector(hdev->pdev, i);
4300 		free_irq(irq, &hdev->user_interrupt[j]);
4301 	}
4302 
4303 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4304 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4305 	free_irq(irq, cq);
4306 
4307 	pci_free_irq_vectors(hdev->pdev);
4308 
4309 	gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
4310 }
4311 
4312 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id)
4313 {
4314 	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4315 	u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4316 	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4317 	int rc;
4318 
4319 	if (hdev->pldm)
4320 		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4321 	else
4322 		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4323 
4324 	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4325 		dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
4326 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4327 			continue;
4328 
4329 		offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET;
4330 
4331 		WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0);
4332 
4333 		WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4334 
4335 		/* Wait till all traffic from decoder stops
4336 		 * before apply core reset.
4337 		 */
4338 		rc = hl_poll_timeout(
4339 				hdev,
4340 				mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4341 				graceful,
4342 				(graceful & graceful_pend_mask),
4343 				100,
4344 				timeout_usec);
4345 		if (rc)
4346 			dev_err(hdev->dev,
4347 				"Failed to stop traffic from DCORE%d Decoder %d\n",
4348 				dcore_id, dec_id);
4349 	}
4350 }
4351 
4352 static void gaudi2_stop_pcie_dec(struct hl_device *hdev)
4353 {
4354 	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4355 	u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4356 	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4357 	int rc;
4358 
4359 	if (hdev->pldm)
4360 		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4361 	else
4362 		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4363 
4364 	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4365 		dec_bit = PCIE_DEC_SHIFT + dec_id;
4366 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4367 			continue;
4368 
4369 		offset = dec_id * PCIE_VDEC_OFFSET;
4370 
4371 		WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0);
4372 
4373 		WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4374 
4375 		/* Wait till all traffic from decoder stops
4376 		 * before apply core reset.
4377 		 */
4378 		rc = hl_poll_timeout(
4379 				hdev,
4380 				mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4381 				graceful,
4382 				(graceful & graceful_pend_mask),
4383 				100,
4384 				timeout_usec);
4385 		if (rc)
4386 			dev_err(hdev->dev,
4387 				"Failed to stop traffic from PCIe Decoder %d\n",
4388 				dec_id);
4389 	}
4390 }
4391 
4392 static void gaudi2_stop_dec(struct hl_device *hdev)
4393 {
4394 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4395 	int dcore_id;
4396 
4397 	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0)
4398 		return;
4399 
4400 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
4401 		gaudi2_stop_dcore_dec(hdev, dcore_id);
4402 
4403 	gaudi2_stop_pcie_dec(hdev);
4404 }
4405 
4406 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4407 {
4408 	u32 reg_base, reg_val;
4409 
4410 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
4411 	if (run_mode == HL_ENGINE_CORE_RUN)
4412 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
4413 	else
4414 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
4415 
4416 	WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
4417 }
4418 
4419 static void gaudi2_halt_arcs(struct hl_device *hdev)
4420 {
4421 	u16 arc_id;
4422 
4423 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
4424 		if (gaudi2_is_arc_enabled(hdev, arc_id))
4425 			gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
4426 	}
4427 }
4428 
4429 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4430 {
4431 	int rc;
4432 	u32 reg_base, val, ack_mask, timeout_usec = 100000;
4433 
4434 	if (hdev->pldm)
4435 		timeout_usec *= 100;
4436 
4437 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
4438 	if (run_mode == HL_ENGINE_CORE_RUN)
4439 		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
4440 	else
4441 		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
4442 
4443 	rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
4444 				val, ((val & ack_mask) == ack_mask),
4445 				1000, timeout_usec);
4446 
4447 	if (!rc) {
4448 		/* Clear */
4449 		val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
4450 		WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
4451 	}
4452 
4453 	return rc;
4454 }
4455 
4456 static void gaudi2_reset_arcs(struct hl_device *hdev)
4457 {
4458 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4459 	u16 arc_id;
4460 
4461 	if (!gaudi2)
4462 		return;
4463 
4464 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++)
4465 		if (gaudi2_is_arc_enabled(hdev, arc_id))
4466 			gaudi2_clr_arc_id_cap(hdev, arc_id);
4467 }
4468 
4469 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
4470 {
4471 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4472 	u32 queue_id;
4473 	int i;
4474 
4475 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4476 		return;
4477 
4478 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4479 
4480 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4481 		if (!(hdev->nic_ports_mask & BIT(i)))
4482 			continue;
4483 
4484 		gaudi2_qman_manual_flush_common(hdev, queue_id);
4485 	}
4486 }
4487 
4488 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
4489 					u32 num_cores, u32 core_command)
4490 {
4491 	int i, rc;
4492 
4493 	for (i = 0 ; i < num_cores ; i++) {
4494 		if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
4495 			gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
4496 	}
4497 
4498 	for (i = 0 ; i < num_cores ; i++) {
4499 		if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
4500 			rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
4501 
4502 			if (rc) {
4503 				dev_err(hdev->dev, "failed to %s arc: %d\n",
4504 					(core_command == HL_ENGINE_CORE_HALT) ?
4505 					"HALT" : "RUN", core_ids[i]);
4506 				return -1;
4507 			}
4508 		}
4509 	}
4510 
4511 	return 0;
4512 }
4513 
4514 static int gaudi2_set_tpc_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4515 {
4516 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4517 	u32 reg_base, reg_addr, reg_val, tpc_id;
4518 
4519 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4520 		return 0;
4521 
4522 	tpc_id = gaudi2_tpc_engine_id_to_tpc_id[engine_id];
4523 	if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + tpc_id)))
4524 		return 0;
4525 
4526 	reg_base = gaudi2_tpc_cfg_blocks_bases[tpc_id];
4527 	reg_addr = reg_base + TPC_CFG_STALL_OFFSET;
4528 	reg_val = FIELD_PREP(DCORE0_TPC0_CFG_TPC_STALL_V_MASK,
4529 			!!(engine_command == HL_ENGINE_STALL));
4530 	WREG32(reg_addr, reg_val);
4531 
4532 	if (engine_command == HL_ENGINE_RESUME) {
4533 		reg_base = gaudi2_tpc_eml_cfg_blocks_bases[tpc_id];
4534 		reg_addr = reg_base + TPC_EML_CFG_DBG_CNT_OFFSET;
4535 		RMWREG32(reg_addr, 0x1, DCORE0_TPC0_EML_CFG_DBG_CNT_DBG_EXIT_MASK);
4536 	}
4537 
4538 	return 0;
4539 }
4540 
4541 static int gaudi2_set_mme_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4542 {
4543 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4544 	u32 reg_base, reg_addr, reg_val, mme_id;
4545 
4546 	mme_id = gaudi2_mme_engine_id_to_mme_id[engine_id];
4547 	if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + mme_id)))
4548 		return 0;
4549 
4550 	reg_base = gaudi2_mme_ctrl_lo_blocks_bases[mme_id];
4551 	reg_addr = reg_base + MME_CTRL_LO_QM_STALL_OFFSET;
4552 	reg_val = FIELD_PREP(DCORE0_MME_CTRL_LO_QM_STALL_V_MASK,
4553 			!!(engine_command == HL_ENGINE_STALL));
4554 	WREG32(reg_addr, reg_val);
4555 
4556 	return 0;
4557 }
4558 
4559 static int gaudi2_set_edma_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4560 {
4561 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4562 	u32 reg_base, reg_addr, reg_val, edma_id;
4563 
4564 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4565 		return 0;
4566 
4567 	edma_id = gaudi2_edma_engine_id_to_edma_id[engine_id];
4568 	if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + edma_id)))
4569 		return 0;
4570 
4571 	reg_base = gaudi2_dma_core_blocks_bases[edma_id];
4572 	reg_addr = reg_base + EDMA_CORE_CFG_STALL_OFFSET;
4573 	reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK,
4574 			!!(engine_command == HL_ENGINE_STALL));
4575 	WREG32(reg_addr, reg_val);
4576 
4577 	if (engine_command == HL_ENGINE_STALL) {
4578 		reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK, 0x1) |
4579 				FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_FLUSH_MASK, 0x1);
4580 		WREG32(reg_addr, reg_val);
4581 	}
4582 
4583 	return 0;
4584 }
4585 
4586 static int gaudi2_set_engine_modes(struct hl_device *hdev,
4587 		u32 *engine_ids, u32 num_engines, u32 engine_command)
4588 {
4589 	int i, rc;
4590 
4591 	for (i = 0 ; i < num_engines ; ++i) {
4592 		switch (engine_ids[i]) {
4593 		case GAUDI2_DCORE0_ENGINE_ID_TPC_0 ... GAUDI2_DCORE0_ENGINE_ID_TPC_5:
4594 		case GAUDI2_DCORE1_ENGINE_ID_TPC_0 ... GAUDI2_DCORE1_ENGINE_ID_TPC_5:
4595 		case GAUDI2_DCORE2_ENGINE_ID_TPC_0 ... GAUDI2_DCORE2_ENGINE_ID_TPC_5:
4596 		case GAUDI2_DCORE3_ENGINE_ID_TPC_0 ... GAUDI2_DCORE3_ENGINE_ID_TPC_5:
4597 			rc = gaudi2_set_tpc_engine_mode(hdev, engine_ids[i], engine_command);
4598 			if (rc)
4599 				return rc;
4600 
4601 			break;
4602 		case GAUDI2_DCORE0_ENGINE_ID_MME:
4603 		case GAUDI2_DCORE1_ENGINE_ID_MME:
4604 		case GAUDI2_DCORE2_ENGINE_ID_MME:
4605 		case GAUDI2_DCORE3_ENGINE_ID_MME:
4606 			rc = gaudi2_set_mme_engine_mode(hdev, engine_ids[i], engine_command);
4607 			if (rc)
4608 				return rc;
4609 
4610 			break;
4611 		case GAUDI2_DCORE0_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE0_ENGINE_ID_EDMA_1:
4612 		case GAUDI2_DCORE1_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE1_ENGINE_ID_EDMA_1:
4613 		case GAUDI2_DCORE2_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE2_ENGINE_ID_EDMA_1:
4614 		case GAUDI2_DCORE3_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE3_ENGINE_ID_EDMA_1:
4615 			rc = gaudi2_set_edma_engine_mode(hdev, engine_ids[i], engine_command);
4616 			if (rc)
4617 				return rc;
4618 
4619 			break;
4620 		default:
4621 			dev_err(hdev->dev, "Invalid engine ID %u\n", engine_ids[i]);
4622 			return -EINVAL;
4623 		}
4624 	}
4625 
4626 	return 0;
4627 }
4628 
4629 static int gaudi2_set_engines(struct hl_device *hdev, u32 *engine_ids,
4630 					u32 num_engines, u32 engine_command)
4631 {
4632 	switch (engine_command) {
4633 	case HL_ENGINE_CORE_HALT:
4634 	case HL_ENGINE_CORE_RUN:
4635 		return gaudi2_set_engine_cores(hdev, engine_ids, num_engines, engine_command);
4636 
4637 	case HL_ENGINE_STALL:
4638 	case HL_ENGINE_RESUME:
4639 		return gaudi2_set_engine_modes(hdev, engine_ids, num_engines, engine_command);
4640 
4641 	default:
4642 		dev_err(hdev->dev, "failed to execute command id %u\n", engine_command);
4643 		return -EINVAL;
4644 	}
4645 }
4646 
4647 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4648 {
4649 	u32 wait_timeout_ms;
4650 
4651 	if (hdev->pldm)
4652 		wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC;
4653 	else
4654 		wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
4655 
4656 	if (fw_reset)
4657 		goto skip_engines;
4658 
4659 	gaudi2_stop_dma_qmans(hdev);
4660 	gaudi2_stop_mme_qmans(hdev);
4661 	gaudi2_stop_tpc_qmans(hdev);
4662 	gaudi2_stop_rot_qmans(hdev);
4663 	gaudi2_stop_nic_qmans(hdev);
4664 	msleep(wait_timeout_ms);
4665 
4666 	gaudi2_halt_arcs(hdev);
4667 	gaudi2_dma_stall(hdev);
4668 	gaudi2_mme_stall(hdev);
4669 	gaudi2_tpc_stall(hdev);
4670 	gaudi2_rotator_stall(hdev);
4671 
4672 	msleep(wait_timeout_ms);
4673 
4674 	gaudi2_stop_dec(hdev);
4675 
4676 	/*
4677 	 * in case of soft reset do a manual flush for QMANs (currently called
4678 	 * only for NIC QMANs
4679 	 */
4680 	if (!hard_reset)
4681 		gaudi2_nic_qmans_manual_flush(hdev);
4682 
4683 	gaudi2_disable_dma_qmans(hdev);
4684 	gaudi2_disable_mme_qmans(hdev);
4685 	gaudi2_disable_tpc_qmans(hdev);
4686 	gaudi2_disable_rot_qmans(hdev);
4687 	gaudi2_disable_nic_qmans(hdev);
4688 	gaudi2_disable_timestamp(hdev);
4689 
4690 skip_engines:
4691 	if (hard_reset) {
4692 		gaudi2_disable_msix(hdev);
4693 		return;
4694 	}
4695 
4696 	gaudi2_sync_irqs(hdev);
4697 }
4698 
4699 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
4700 {
4701 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
4702 
4703 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
4704 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
4705 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
4706 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
4707 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
4708 	pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
4709 }
4710 
4711 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
4712 {
4713 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4714 	struct dynamic_fw_load_mgr *dynamic_loader;
4715 	struct cpu_dyn_regs *dyn_regs;
4716 
4717 	/* fill common fields */
4718 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
4719 	fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE;
4720 	fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE;
4721 	fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC;
4722 	fw_loader->skip_bmc = false;
4723 	fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
4724 	fw_loader->dram_bar_id = DRAM_BAR_ID;
4725 	fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
4726 
4727 	/* here we update initial values for few specific dynamic regs (as
4728 	 * before reading the first descriptor from FW those value has to be
4729 	 * hard-coded). in later stages of the protocol those values will be
4730 	 * updated automatically by reading the FW descriptor so data there
4731 	 * will always be up-to-date
4732 	 */
4733 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
4734 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
4735 	dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
4736 	dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
4737 	dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC;
4738 }
4739 
4740 static int gaudi2_init_cpu(struct hl_device *hdev)
4741 {
4742 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4743 	int rc;
4744 
4745 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4746 		return 0;
4747 
4748 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU)
4749 		return 0;
4750 
4751 	rc = hl_fw_init_cpu(hdev);
4752 	if (rc)
4753 		return rc;
4754 
4755 	gaudi2->hw_cap_initialized |= HW_CAP_CPU;
4756 
4757 	return 0;
4758 }
4759 
4760 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4761 {
4762 	struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
4763 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4764 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4765 	struct cpu_dyn_regs *dyn_regs;
4766 	struct hl_eq *eq;
4767 	u32 status;
4768 	int err;
4769 
4770 	if (!hdev->cpu_queues_enable)
4771 		return 0;
4772 
4773 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
4774 		return 0;
4775 
4776 	eq = &hdev->event_queue;
4777 
4778 	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4779 
4780 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4781 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4782 
4783 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4784 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4785 
4786 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address));
4787 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address));
4788 
4789 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4790 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4791 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4792 
4793 	/* Used for EQ CI */
4794 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4795 
4796 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
4797 
4798 	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4799 
4800 	/* Let the ARC know we are ready as it is now handling those queues  */
4801 
4802 	WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
4803 		gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
4804 
4805 	err = hl_poll_timeout(
4806 		hdev,
4807 		mmCPU_IF_QUEUE_INIT,
4808 		status,
4809 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
4810 		1000,
4811 		cpu_timeout);
4812 
4813 	if (err) {
4814 		dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n");
4815 		return -EIO;
4816 	}
4817 
4818 	/* update FW application security bits */
4819 	if (prop->fw_cpu_boot_dev_sts0_valid)
4820 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4821 
4822 	if (prop->fw_cpu_boot_dev_sts1_valid)
4823 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4824 
4825 	gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q;
4826 	return 0;
4827 }
4828 
4829 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
4830 				u32 queue_id_base)
4831 {
4832 	struct hl_hw_queue *q;
4833 	u32 pq_id, pq_offset;
4834 
4835 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4836 		q = &hdev->kernel_queues[queue_id_base + pq_id];
4837 		pq_offset = pq_id * 4;
4838 
4839 		WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
4840 				lower_32_bits(q->bus_address));
4841 		WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
4842 				upper_32_bits(q->bus_address));
4843 		WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
4844 		WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
4845 		WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
4846 	}
4847 }
4848 
4849 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base)
4850 {
4851 	u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi;
4852 
4853 	mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4854 	mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4855 	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4856 	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4857 
4858 	for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) {
4859 		cp_offset = cp_id * 4;
4860 
4861 		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo);
4862 		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset,	mtr_base_hi);
4863 		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset,	so_base_lo);
4864 		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset,	so_base_hi);
4865 	}
4866 
4867 	/* allow QMANs to accept work from ARC CQF */
4868 	WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1));
4869 }
4870 
4871 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base,
4872 				u32 queue_id_base)
4873 {
4874 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4875 	u32 pq_id, pq_offset, so_base_lo, so_base_hi;
4876 
4877 	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4878 	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4879 
4880 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4881 		pq_offset = pq_id * 4;
4882 
4883 		/* Configure QMAN HBW to scratchpad as it is not needed */
4884 		WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset,
4885 				lower_32_bits(gaudi2->scratchpad_bus_address));
4886 		WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset,
4887 				upper_32_bits(gaudi2->scratchpad_bus_address));
4888 		WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset,
4889 				ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry)));
4890 
4891 		WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0);
4892 		WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA);
4893 		WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo);
4894 		WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi);
4895 	}
4896 
4897 	/* Enable QMAN H/W completion */
4898 	WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
4899 }
4900 
4901 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base)
4902 {
4903 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4904 	u32 sp_reg_addr;
4905 
4906 	switch (queue_id_base) {
4907 	case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3:
4908 		fallthrough;
4909 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
4910 		fallthrough;
4911 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
4912 		fallthrough;
4913 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
4914 		fallthrough;
4915 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
4916 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
4917 		break;
4918 	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
4919 		fallthrough;
4920 	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
4921 		fallthrough;
4922 	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
4923 		fallthrough;
4924 	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
4925 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
4926 		break;
4927 	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
4928 		fallthrough;
4929 	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
4930 		fallthrough;
4931 	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
4932 		fallthrough;
4933 	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
4934 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
4935 		break;
4936 	case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3:
4937 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl);
4938 		break;
4939 	case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3:
4940 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
4941 		break;
4942 	default:
4943 		dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base);
4944 		return 0;
4945 	}
4946 
4947 	return sp_reg_addr;
4948 }
4949 
4950 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
4951 					u32 queue_id_base)
4952 {
4953 	u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset;
4954 	int map_table_entry;
4955 
4956 	WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot);
4957 
4958 	irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base);
4959 	WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset));
4960 	WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset));
4961 
4962 	map_table_entry = gaudi2_qman_async_event_id[queue_id_base];
4963 	WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET,
4964 		gaudi2_irq_map_table[map_table_entry].cpu_id);
4965 
4966 	WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK);
4967 
4968 	WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT);
4969 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0);
4970 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
4971 
4972 	/* Enable the QMAN channel.
4973 	 * PDMA QMAN configuration is different, as we do not allow user to
4974 	 * access some of the CPs.
4975 	 * PDMA0: CP2/3 are reserved for the ARC usage.
4976 	 * PDMA1: CP1/2/3 are reserved for the ARC usage.
4977 	 */
4978 	if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
4979 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
4980 	else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
4981 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
4982 	else
4983 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
4984 }
4985 
4986 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
4987 		u32 queue_id_base)
4988 {
4989 	u32 pq_id;
4990 
4991 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
4992 		hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
4993 
4994 	gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
4995 	gaudi2_init_qman_cp(hdev, reg_base);
4996 	gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base);
4997 	gaudi2_init_qman_common(hdev, reg_base, queue_id_base);
4998 }
4999 
5000 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base,
5001 				u32 dma_core_id, bool is_secure)
5002 {
5003 	u32 prot, irq_handler_offset;
5004 	struct cpu_dyn_regs *dyn_regs;
5005 	int map_table_entry;
5006 
5007 	prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT;
5008 	if (is_secure)
5009 		prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT;
5010 
5011 	WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot);
5012 
5013 	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5014 	irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
5015 
5016 	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET,
5017 			lower_32_bits(CFG_BASE + irq_handler_offset));
5018 
5019 	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET,
5020 			upper_32_bits(CFG_BASE + irq_handler_offset));
5021 
5022 	map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id];
5023 	WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET,
5024 		gaudi2_irq_map_table[map_table_entry].cpu_id);
5025 
5026 	/* Enable the DMA channel */
5027 	WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT);
5028 }
5029 
5030 static void gaudi2_init_kdma(struct hl_device *hdev)
5031 {
5032 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5033 	u32 reg_base;
5034 
5035 	if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA)
5036 		return;
5037 
5038 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA];
5039 
5040 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true);
5041 
5042 	gaudi2->hw_cap_initialized |= HW_CAP_KDMA;
5043 }
5044 
5045 static void gaudi2_init_pdma(struct hl_device *hdev)
5046 {
5047 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5048 	u32 reg_base;
5049 
5050 	if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK)
5051 		return;
5052 
5053 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0];
5054 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false);
5055 
5056 	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0];
5057 	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0);
5058 
5059 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1];
5060 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false);
5061 
5062 	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0];
5063 	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0);
5064 
5065 	gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK;
5066 }
5067 
5068 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq)
5069 {
5070 	u32 reg_base, base_edma_core_id, base_edma_qman_id;
5071 
5072 	base_edma_core_id = DMA_CORE_ID_EDMA0 + seq;
5073 	base_edma_qman_id = edma_stream_base[seq];
5074 
5075 	reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id];
5076 	gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false);
5077 
5078 	reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id];
5079 	gaudi2_init_qman(hdev, reg_base, base_edma_qman_id);
5080 }
5081 
5082 static void gaudi2_init_edma(struct hl_device *hdev)
5083 {
5084 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5085 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5086 	int dcore, inst;
5087 
5088 	if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK)
5089 		return;
5090 
5091 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
5092 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
5093 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
5094 
5095 			if (!(prop->edma_enabled_mask & BIT(seq)))
5096 				continue;
5097 
5098 			gaudi2_init_edma_instance(hdev, seq);
5099 
5100 			gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq);
5101 		}
5102 	}
5103 }
5104 
5105 /*
5106  * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
5107  * @hdev: pointer to habanalabs device structure.
5108  * @sob_id: sync object ID.
5109  * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
5110  * @interrupt_id: interrupt ID.
5111  *
5112  * Some initiators cannot have HBW address in their completion address registers, and thus cannot
5113  * write directly to the HBW host memory of the virtual MSI-X doorbell.
5114  * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
5115  *
5116  * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
5117  * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
5118  * completion, by decrementing the sync object value and re-arming the monitor.
5119  */
5120 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
5121 							u32 first_mon_id, u32 interrupt_id)
5122 {
5123 	u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
5124 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5125 	u64 addr;
5126 	u8 mask;
5127 
5128 	/* Reset the SOB value */
5129 	sob_offset = sob_id * sizeof(u32);
5130 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
5131 
5132 	/* Configure 3 monitors:
5133 	 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
5134 	 * 2. Decrement SOB value by 1.
5135 	 * 3. Re-arm the master monitor.
5136 	 */
5137 
5138 	first_mon_offset = first_mon_id * sizeof(u32);
5139 
5140 	/* 2nd monitor: Decrement SOB value by 1 */
5141 	mon_offset = first_mon_offset + sizeof(u32);
5142 
5143 	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
5144 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5145 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5146 
5147 	payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
5148 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
5149 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
5150 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5151 
5152 	/* 3rd monitor: Re-arm the master monitor */
5153 	mon_offset = first_mon_offset + 2 * sizeof(u32);
5154 
5155 	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
5156 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5157 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5158 
5159 	sob_group = sob_id / 8;
5160 	mask = ~BIT(sob_id & 0x7);
5161 	mode = 0; /* comparison mode is "greater than or equal to" */
5162 	arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
5163 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
5164 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
5165 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
5166 
5167 	payload = arm;
5168 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5169 
5170 	/* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
5171 	mon_offset = first_mon_offset;
5172 
5173 	config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
5174 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
5175 
5176 	addr = gaudi2->virt_msix_db_dma_addr;
5177 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5178 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5179 
5180 	payload = interrupt_id;
5181 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5182 
5183 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
5184 }
5185 
5186 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
5187 {
5188 	u32 decoder_id, sob_id, first_mon_id, interrupt_id;
5189 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5190 
5191 	/* Decoder normal/abnormal interrupts */
5192 	for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
5193 		if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
5194 			continue;
5195 
5196 		sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5197 		first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
5198 		interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
5199 		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5200 
5201 		sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5202 		first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
5203 		interrupt_id += 1;
5204 		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5205 	}
5206 }
5207 
5208 static void gaudi2_init_sm(struct hl_device *hdev)
5209 {
5210 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5211 	u64 cq_address;
5212 	u32 reg_val;
5213 	int i;
5214 
5215 	/* Enable HBW/LBW CQ for completion monitors */
5216 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5217 	reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1);
5218 
5219 	for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++)
5220 		WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5221 
5222 	/* Enable only HBW CQ for KDMA completion monitor */
5223 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5224 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5225 
5226 	/* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */
5227 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr));
5228 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr));
5229 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
5230 
5231 	for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
5232 		cq_address =
5233 			hdev->completion_queue[i].bus_address;
5234 
5235 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i),
5236 							lower_32_bits(cq_address));
5237 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i),
5238 							upper_32_bits(cq_address));
5239 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i),
5240 							ilog2(HL_CQ_SIZE_IN_BYTES));
5241 	}
5242 
5243 	/* Configure kernel ASID and MMU BP*/
5244 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
5245 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
5246 
5247 	/* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
5248 	gaudi2_prepare_sm_for_virt_msix_db(hdev);
5249 }
5250 
5251 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
5252 {
5253 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5254 	u32 reg_val;
5255 	int i;
5256 
5257 	reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0);
5258 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1);
5259 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1);
5260 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1);
5261 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1);
5262 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1);
5263 
5264 	WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val);
5265 	WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF);
5266 
5267 	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) {
5268 		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i);
5269 		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]);
5270 	}
5271 }
5272 
5273 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
5274 							bool config_qman_only)
5275 {
5276 	u32 queue_id_base, reg_base;
5277 
5278 	switch (dcore_id) {
5279 	case 0:
5280 		queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
5281 		break;
5282 	case 1:
5283 		queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
5284 		break;
5285 	case 2:
5286 		queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
5287 		break;
5288 	case 3:
5289 		queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
5290 		break;
5291 	default:
5292 		dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
5293 		return;
5294 	}
5295 
5296 	if (!config_qman_only) {
5297 		reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
5298 		gaudi2_init_mme_acc(hdev, reg_base);
5299 	}
5300 
5301 	reg_base = gaudi2_qm_blocks_bases[queue_id_base];
5302 	gaudi2_init_qman(hdev, reg_base, queue_id_base);
5303 }
5304 
5305 static void gaudi2_init_mme(struct hl_device *hdev)
5306 {
5307 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5308 	int i;
5309 
5310 	if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK)
5311 		return;
5312 
5313 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
5314 		gaudi2_init_dcore_mme(hdev, i, false);
5315 
5316 		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i);
5317 	}
5318 }
5319 
5320 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base)
5321 {
5322 	/* Mask arithmetic and QM interrupts in TPC */
5323 	WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE);
5324 
5325 	/* Set 16 cache lines */
5326 	WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET,
5327 			2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT);
5328 }
5329 
5330 struct gaudi2_tpc_init_cfg_data {
5331 	enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES];
5332 };
5333 
5334 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
5335 					u32 offset, struct iterate_module_ctx *ctx)
5336 {
5337 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5338 	struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
5339 	u32 queue_id_base;
5340 	u8 seq;
5341 
5342 	queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN);
5343 
5344 	if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1))
5345 		/* gets last sequence number */
5346 		seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE;
5347 	else
5348 		seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
5349 
5350 	gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset);
5351 	gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base);
5352 
5353 	gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq);
5354 }
5355 
5356 static void gaudi2_init_tpc(struct hl_device *hdev)
5357 {
5358 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5359 	struct gaudi2_tpc_init_cfg_data init_cfg_data;
5360 	struct iterate_module_ctx tpc_iter;
5361 
5362 	if (!hdev->asic_prop.tpc_enabled_mask)
5363 		return;
5364 
5365 	if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK)
5366 		return;
5367 
5368 	init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0;
5369 	init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0;
5370 	init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0;
5371 	init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0;
5372 	tpc_iter.fn = &gaudi2_init_tpc_config;
5373 	tpc_iter.data = &init_cfg_data;
5374 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
5375 }
5376 
5377 static void gaudi2_init_rotator(struct hl_device *hdev)
5378 {
5379 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5380 	u32 i, reg_base, queue_id;
5381 
5382 	queue_id = GAUDI2_QUEUE_ID_ROT_0_0;
5383 
5384 	for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
5385 		reg_base = gaudi2_qm_blocks_bases[queue_id];
5386 		gaudi2_init_qman(hdev, reg_base, queue_id);
5387 
5388 		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i);
5389 	}
5390 }
5391 
5392 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
5393 {
5394 	u32 sob_id;
5395 
5396 	/* VCMD normal interrupt */
5397 	sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5398 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
5399 			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5400 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5401 
5402 	/* VCMD abnormal interrupt */
5403 	sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5404 	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
5405 			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5406 	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5407 }
5408 
5409 static void gaudi2_init_dec(struct hl_device *hdev)
5410 {
5411 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5412 	u32 dcore_id, dec_id, dec_bit;
5413 	u64 base_addr;
5414 
5415 	if (!hdev->asic_prop.decoder_enabled_mask)
5416 		return;
5417 
5418 	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK)
5419 		return;
5420 
5421 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5422 		for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
5423 			dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
5424 
5425 			if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5426 				continue;
5427 
5428 			base_addr =  mmDCORE0_DEC0_CMD_BASE +
5429 					BRDG_CTRL_BLOCK_OFFSET +
5430 					dcore_id * DCORE_OFFSET +
5431 					dec_id * DCORE_VDEC_OFFSET;
5432 
5433 			gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5434 
5435 			gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5436 		}
5437 
5438 	for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) {
5439 		dec_bit = PCIE_DEC_SHIFT + dec_id;
5440 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5441 			continue;
5442 
5443 		base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET +
5444 				dec_id * DCORE_VDEC_OFFSET;
5445 
5446 		gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5447 
5448 		gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5449 	}
5450 }
5451 
5452 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
5453 					u32 stlb_base, u32 asid, u64 phys_addr)
5454 {
5455 	u32 status, timeout_usec;
5456 	int rc;
5457 
5458 	if (hdev->pldm || !hdev->pdev)
5459 		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5460 	else
5461 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5462 
5463 	WREG32(stlb_base + STLB_ASID_OFFSET, asid);
5464 	WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
5465 	WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT);
5466 	WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000);
5467 
5468 	rc = hl_poll_timeout(
5469 		hdev,
5470 		stlb_base + STLB_BUSY_OFFSET,
5471 		status,
5472 		!(status & 0x80000000),
5473 		1000,
5474 		timeout_usec);
5475 
5476 	if (rc) {
5477 		dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid);
5478 		return rc;
5479 	}
5480 
5481 	return 0;
5482 }
5483 
5484 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base,
5485 					u32 start_offset, u32 inv_start_val,
5486 					u32 flags)
5487 {
5488 	/* clear PMMU mem line cache (only needed in mmu range invalidation) */
5489 	if (flags & MMU_OP_CLEAR_MEMCACHE)
5490 		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1);
5491 
5492 	if (flags & MMU_OP_SKIP_LOW_CACHE_INV)
5493 		return;
5494 
5495 	WREG32(stlb_base + start_offset, inv_start_val);
5496 }
5497 
5498 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base,
5499 						struct gaudi2_cache_invld_params *inv_params)
5500 {
5501 	u32 status, timeout_usec, start_offset;
5502 	int rc;
5503 
5504 	timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC :
5505 					GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5506 
5507 	/* poll PMMU mem line cache (only needed in mmu range invalidation) */
5508 	if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) {
5509 		rc = hl_poll_timeout(
5510 			hdev,
5511 			mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS,
5512 			status,
5513 			status & 0x1,
5514 			1000,
5515 			timeout_usec);
5516 
5517 		if (rc)
5518 			return rc;
5519 
5520 		/* Need to manually reset the status to 0 */
5521 		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0);
5522 	}
5523 
5524 	/* Lower cache does not work with cache lines, hence we can skip its
5525 	 * invalidation upon map and invalidate only upon unmap
5526 	 */
5527 	if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV)
5528 		return 0;
5529 
5530 	start_offset = inv_params->range_invalidation ?
5531 			STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET;
5532 
5533 	rc = hl_poll_timeout(
5534 		hdev,
5535 		stlb_base + start_offset,
5536 		status,
5537 		!(status & 0x1),
5538 		1000,
5539 		timeout_usec);
5540 
5541 	return rc;
5542 }
5543 
5544 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id)
5545 {
5546 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5547 	u32 hw_cap;
5548 
5549 	hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id);
5550 
5551 	if (gaudi2->hw_cap_initialized & hw_cap)
5552 		return true;
5553 
5554 	return false;
5555 }
5556 
5557 /* this function shall be called only for HMMUs for which capability bit is set */
5558 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id)
5559 {
5560 	u32 offset;
5561 
5562 	offset =  (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5563 	return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset);
5564 }
5565 
5566 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base,
5567 						struct gaudi2_cache_invld_params *inv_params)
5568 {
5569 	u32 start_offset;
5570 
5571 	if (inv_params->range_invalidation) {
5572 		/* Set the addresses range
5573 		 * Note: that the start address we set in register, is not included in
5574 		 * the range of the invalidation, by design.
5575 		 * that's why we need to set lower address than the one we actually
5576 		 * want to be included in the range invalidation.
5577 		 */
5578 		u64 start = inv_params->start_va - 1;
5579 
5580 		start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET;
5581 
5582 		WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET,
5583 				start >> MMU_RANGE_INV_VA_LSB_SHIFT);
5584 
5585 		WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET,
5586 				start >> MMU_RANGE_INV_VA_MSB_SHIFT);
5587 
5588 		WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET,
5589 				inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT);
5590 
5591 		WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET,
5592 				inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT);
5593 	} else {
5594 		start_offset = STLB_INV_ALL_START_OFFSET;
5595 	}
5596 
5597 	gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset,
5598 						inv_params->inv_start_val, inv_params->flags);
5599 }
5600 
5601 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev,
5602 						int dcore_id, int hmmu_id,
5603 						struct gaudi2_cache_invld_params *inv_params)
5604 {
5605 	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5606 
5607 	gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params);
5608 }
5609 
5610 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev,
5611 						int dcore_id, int hmmu_id,
5612 						struct gaudi2_cache_invld_params *inv_params)
5613 {
5614 	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5615 
5616 	return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params);
5617 }
5618 
5619 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev,
5620 						struct gaudi2_cache_invld_params *inv_params)
5621 {
5622 	int dcore_id, hmmu_id;
5623 
5624 	/* first send all invalidation commands */
5625 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5626 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5627 			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5628 				continue;
5629 
5630 			gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params);
5631 		}
5632 	}
5633 
5634 	/* next, poll all invalidations status */
5635 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5636 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5637 			int rc;
5638 
5639 			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5640 				continue;
5641 
5642 			rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id,
5643 										inv_params);
5644 			if (rc)
5645 				return rc;
5646 		}
5647 	}
5648 
5649 	return 0;
5650 }
5651 
5652 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
5653 {
5654 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5655 	struct gaudi2_cache_invld_params invld_params;
5656 	int rc = 0;
5657 
5658 	if (hdev->reset_info.hard_reset_pending)
5659 		return rc;
5660 
5661 	invld_params.range_invalidation = false;
5662 	invld_params.inv_start_val = 1;
5663 
5664 	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5665 		invld_params.flags = flags;
5666 		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5667 		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5668 										&invld_params);
5669 	} else if (flags & MMU_OP_PHYS_PACK) {
5670 		invld_params.flags = 0;
5671 		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5672 	}
5673 
5674 	return rc;
5675 }
5676 
5677 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
5678 				u32 flags, u32 asid, u64 va, u64 size)
5679 {
5680 	struct gaudi2_cache_invld_params invld_params = {0};
5681 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5682 	u64 start_va, end_va;
5683 	u32 inv_start_val;
5684 	int rc = 0;
5685 
5686 	if (hdev->reset_info.hard_reset_pending)
5687 		return 0;
5688 
5689 	inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT |
5690 			1 << MMU_RANGE_INV_ASID_EN_SHIFT |
5691 			asid << MMU_RANGE_INV_ASID_SHIFT);
5692 	start_va = va;
5693 	end_va = start_va + size;
5694 
5695 	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5696 		/* As range invalidation does not support zero address we will
5697 		 * do full invalidation in this case
5698 		 */
5699 		if (start_va) {
5700 			invld_params.range_invalidation = true;
5701 			invld_params.start_va = start_va;
5702 			invld_params.end_va = end_va;
5703 			invld_params.inv_start_val = inv_start_val;
5704 			invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE;
5705 		} else {
5706 			invld_params.range_invalidation = false;
5707 			invld_params.inv_start_val = 1;
5708 			invld_params.flags = flags;
5709 		}
5710 
5711 
5712 		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5713 		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5714 										&invld_params);
5715 		if (rc)
5716 			return rc;
5717 
5718 	} else if (flags & MMU_OP_PHYS_PACK) {
5719 		invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va);
5720 		invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va);
5721 		invld_params.inv_start_val = inv_start_val;
5722 		invld_params.flags = flags;
5723 		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5724 	}
5725 
5726 	return rc;
5727 }
5728 
5729 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
5730 {
5731 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5732 	u64 hop0_addr;
5733 	u32 asid, max_asid = prop->max_asid;
5734 	int rc;
5735 
5736 	/* it takes too much time to init all of the ASIDs on palladium */
5737 	if (hdev->pldm)
5738 		max_asid = min((u32) 8, max_asid);
5739 
5740 	for (asid = 0 ; asid < max_asid ; asid++) {
5741 		hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
5742 		rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
5743 		if (rc) {
5744 			dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
5745 			return rc;
5746 		}
5747 	}
5748 
5749 	return 0;
5750 }
5751 
5752 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base)
5753 {
5754 	u32 status, timeout_usec;
5755 	int rc;
5756 
5757 	if (hdev->pldm || !hdev->pdev)
5758 		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5759 	else
5760 		timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5761 
5762 	WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1);
5763 
5764 	rc = hl_poll_timeout(
5765 		hdev,
5766 		stlb_base + STLB_SRAM_INIT_OFFSET,
5767 		status,
5768 		!status,
5769 		1000,
5770 		timeout_usec);
5771 
5772 	if (rc)
5773 		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
5774 
5775 	rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base);
5776 	if (rc)
5777 		return rc;
5778 
5779 	WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
5780 
5781 	rc = hl_poll_timeout(
5782 		hdev,
5783 		stlb_base + STLB_INV_ALL_START_OFFSET,
5784 		status,
5785 		!status,
5786 		1000,
5787 		timeout_usec);
5788 
5789 	if (rc)
5790 		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n");
5791 
5792 	WREG32(mmu_base + MMU_ENABLE_OFFSET, 1);
5793 
5794 	return rc;
5795 }
5796 
5797 static int gaudi2_pci_mmu_init(struct hl_device *hdev)
5798 {
5799 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5800 	u32 mmu_base, stlb_base;
5801 	int rc;
5802 
5803 	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU)
5804 		return 0;
5805 
5806 	mmu_base = mmPMMU_HBW_MMU_BASE;
5807 	stlb_base = mmPMMU_HBW_STLB_BASE;
5808 
5809 	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5810 		(0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
5811 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
5812 		(4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
5813 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) |
5814 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT),
5815 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5816 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5817 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5818 		PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5819 		PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5820 
5821 	WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0);
5822 
5823 	if (PAGE_SIZE == SZ_64K) {
5824 		/* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
5825 		RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
5826 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
5827 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
5828 			FIELD_PREP(
5829 				DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK,
5830 				1),
5831 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK |
5832 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK |
5833 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
5834 	}
5835 
5836 	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
5837 
5838 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5839 	if (rc)
5840 		return rc;
5841 
5842 	gaudi2->hw_cap_initialized |= HW_CAP_PMMU;
5843 
5844 	return 0;
5845 }
5846 
5847 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
5848 				int hmmu_id)
5849 {
5850 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5851 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5852 	u32 offset, mmu_base, stlb_base, hw_cap;
5853 	u8 dmmu_seq;
5854 	int rc;
5855 
5856 	dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id;
5857 	hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq;
5858 
5859 	/*
5860 	 * return if DMMU is already initialized or if it's not out of
5861 	 * isolation (due to cluster binning)
5862 	 */
5863 	if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq)))
5864 		return 0;
5865 
5866 	offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5867 	mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset;
5868 	stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset;
5869 
5870 	RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
5871 			MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
5872 
5873 	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5874 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
5875 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
5876 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
5877 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) |
5878 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3),
5879 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5880 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5881 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5882 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5883 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5884 
5885 	RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
5886 			STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
5887 
5888 	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
5889 
5890 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5891 	if (rc)
5892 		return rc;
5893 
5894 	gaudi2->hw_cap_initialized |= hw_cap;
5895 
5896 	return 0;
5897 }
5898 
5899 static int gaudi2_hbm_mmu_init(struct hl_device *hdev)
5900 {
5901 	int rc, dcore_id, hmmu_id;
5902 
5903 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5904 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) {
5905 			rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id);
5906 			if (rc)
5907 				return rc;
5908 		}
5909 
5910 	return 0;
5911 }
5912 
5913 static int gaudi2_mmu_init(struct hl_device *hdev)
5914 {
5915 	int rc;
5916 
5917 	rc = gaudi2_pci_mmu_init(hdev);
5918 	if (rc)
5919 		return rc;
5920 
5921 	rc = gaudi2_hbm_mmu_init(hdev);
5922 	if (rc)
5923 		return rc;
5924 
5925 	return 0;
5926 }
5927 
5928 static int gaudi2_hw_init(struct hl_device *hdev)
5929 {
5930 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5931 	int rc;
5932 
5933 	/* Let's mark in the H/W that we have reached this point. We check
5934 	 * this value in the reset_before_init function to understand whether
5935 	 * we need to reset the chip before doing H/W init. This register is
5936 	 * cleared by the H/W upon H/W reset
5937 	 */
5938 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
5939 
5940 	/* Perform read from the device to make sure device is up */
5941 	RREG32(mmHW_STATE);
5942 
5943 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
5944 	 * So we set it here and if anyone tries to move it later to
5945 	 * a different address, there will be an error
5946 	 */
5947 	if (hdev->asic_prop.iatu_done_by_fw)
5948 		gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE;
5949 
5950 	/*
5951 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
5952 	 * base address of dram
5953 	 */
5954 	if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
5955 		dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n");
5956 		return -EIO;
5957 	}
5958 
5959 	rc = gaudi2_init_cpu(hdev);
5960 	if (rc) {
5961 		dev_err(hdev->dev, "failed to initialize CPU\n");
5962 		return rc;
5963 	}
5964 
5965 	gaudi2_init_scrambler_hbm(hdev);
5966 	gaudi2_init_kdma(hdev);
5967 
5968 	rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC);
5969 	if (rc) {
5970 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc);
5971 		return rc;
5972 	}
5973 
5974 	rc = gaudi2->cpucp_info_get(hdev);
5975 	if (rc) {
5976 		dev_err(hdev->dev, "Failed to get cpucp info\n");
5977 		return rc;
5978 	}
5979 
5980 	rc = gaudi2_mmu_init(hdev);
5981 	if (rc)
5982 		return rc;
5983 
5984 	gaudi2_init_pdma(hdev);
5985 	gaudi2_init_edma(hdev);
5986 	gaudi2_init_sm(hdev);
5987 	gaudi2_init_tpc(hdev);
5988 	gaudi2_init_mme(hdev);
5989 	gaudi2_init_rotator(hdev);
5990 	gaudi2_init_dec(hdev);
5991 	gaudi2_enable_timestamp(hdev);
5992 
5993 	rc = gaudi2_coresight_init(hdev);
5994 	if (rc)
5995 		goto disable_queues;
5996 
5997 	rc = gaudi2_enable_msix(hdev);
5998 	if (rc)
5999 		goto disable_queues;
6000 
6001 	/* Perform read from the device to flush all configuration */
6002 	RREG32(mmHW_STATE);
6003 
6004 	return 0;
6005 
6006 disable_queues:
6007 	gaudi2_disable_dma_qmans(hdev);
6008 	gaudi2_disable_mme_qmans(hdev);
6009 	gaudi2_disable_tpc_qmans(hdev);
6010 	gaudi2_disable_rot_qmans(hdev);
6011 	gaudi2_disable_nic_qmans(hdev);
6012 
6013 	gaudi2_disable_timestamp(hdev);
6014 
6015 	return rc;
6016 }
6017 
6018 /**
6019  * gaudi2_send_hard_reset_cmd - common function to handle reset
6020  *
6021  * @hdev: pointer to the habanalabs device structure
6022  *
6023  * This function handles the various possible scenarios for reset.
6024  * It considers if reset is handled by driver\FW and what FW components are loaded
6025  */
6026 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
6027 {
6028 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6029 	bool heartbeat_reset, preboot_only, cpu_initialized = false;
6030 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6031 	u32 cpu_boot_status;
6032 
6033 	preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU);
6034 	heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT);
6035 
6036 	/*
6037 	 * Handle corner case where failure was at cpu management app load,
6038 	 * and driver didn't detect any failure while loading the FW,
6039 	 * then at such scenario driver will send only HALT_MACHINE
6040 	 * and no one will respond to this request since FW already back to preboot
6041 	 * and it cannot handle such cmd.
6042 	 * In this case next time the management app loads it'll check on events register
6043 	 * which will still have the halt indication, and will reboot the device.
6044 	 * The solution is to let preboot clear all relevant registers before next boot
6045 	 * once driver send COMMS_RST_DEV.
6046 	 */
6047 	cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS);
6048 
6049 	if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) &&
6050 			(cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL))
6051 		cpu_initialized = true;
6052 
6053 	/*
6054 	 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways:
6055 	 * 1. FW reset: FW initiate the reset sequence
6056 	 * 2. driver reset: FW will start HALT sequence (the preparations for the
6057 	 *                  reset but not the reset itself as it is not implemented
6058 	 *                  on their part) and LKD will wait to let FW complete the
6059 	 *                  sequence before issuing the reset
6060 	 */
6061 	if (!preboot_only && cpu_initialized) {
6062 		WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq),
6063 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id);
6064 
6065 		msleep(GAUDI2_CPU_RESET_WAIT_MSEC);
6066 	}
6067 
6068 	/*
6069 	 * When working with preboot (without Linux/Boot fit) we can
6070 	 * communicate only using the COMMS commands to issue halt/reset.
6071 	 *
6072 	 * For the case in which we are working with Linux/Bootfit this is a hail-mary
6073 	 * attempt to revive the card in the small chance that the f/w has
6074 	 * experienced a watchdog event, which caused it to return back to preboot.
6075 	 * In that case, triggering reset through GIC won't help. We need to
6076 	 * trigger the reset as if Linux wasn't loaded.
6077 	 *
6078 	 * We do it only if the reset cause was HB, because that would be the
6079 	 * indication of such an event.
6080 	 *
6081 	 * In case watchdog hasn't expired but we still got HB, then this won't
6082 	 * do any damage.
6083 	 */
6084 
6085 	if (heartbeat_reset || preboot_only || !cpu_initialized) {
6086 		if (hdev->asic_prop.hard_reset_done_by_fw)
6087 			hl_fw_ask_hard_reset_without_linux(hdev);
6088 		else
6089 			hl_fw_ask_halt_machine_without_linux(hdev);
6090 	}
6091 }
6092 
6093 /**
6094  * gaudi2_execute_hard_reset - execute hard reset by driver/FW
6095  *
6096  * @hdev: pointer to the habanalabs device structure
6097  *
6098  * This function executes hard reset based on if driver/FW should do the reset
6099  */
6100 static void gaudi2_execute_hard_reset(struct hl_device *hdev)
6101 {
6102 	if (hdev->asic_prop.hard_reset_done_by_fw) {
6103 		gaudi2_send_hard_reset_cmd(hdev);
6104 		return;
6105 	}
6106 
6107 	/* Set device to handle FLR by H/W as we will put the device
6108 	 * CPU to halt mode
6109 	 */
6110 	WREG32(mmPCIE_AUX_FLR_CTRL,
6111 			(PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
6112 
6113 	gaudi2_send_hard_reset_cmd(hdev);
6114 
6115 	WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
6116 }
6117 
6118 static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us)
6119 {
6120 	int i, rc = 0;
6121 	u32 reg_val;
6122 
6123 	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6124 		rc = hl_poll_timeout(
6125 			hdev,
6126 			mmCPU_RST_STATUS_TO_HOST,
6127 			reg_val,
6128 			reg_val == CPU_RST_STATUS_SOFT_RST_DONE,
6129 			1000,
6130 			poll_timeout_us);
6131 
6132 	if (rc)
6133 		dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n",
6134 				reg_val);
6135 	return rc;
6136 }
6137 
6138 /**
6139  * gaudi2_execute_soft_reset - execute soft reset by driver/FW
6140  *
6141  * @hdev: pointer to the habanalabs device structure
6142  * @driver_performs_reset: true if driver should perform reset instead of f/w.
6143  * @poll_timeout_us: time to wait for response from f/w.
6144  *
6145  * This function executes soft reset based on if driver/FW should do the reset
6146  */
6147 static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset,
6148 						u32 poll_timeout_us)
6149 {
6150 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6151 
6152 	if (!driver_performs_reset) {
6153 		/* set SP to indicate reset request sent to FW */
6154 		if (dyn_regs->cpu_rst_status)
6155 			WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
6156 		else
6157 			WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
6158 
6159 		WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
6160 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
6161 
6162 		return gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us);
6163 	}
6164 
6165 	/* Block access to engines, QMANs and SM during reset, these
6166 	 * RRs will be reconfigured after soft reset.
6167 	 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset.
6168 	 */
6169 	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1,
6170 					mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE);
6171 
6172 	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2,
6173 				mmPCIE_MSIX_BASE + HL_BLOCK_SIZE,
6174 				mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
6175 
6176 	WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
6177 	return 0;
6178 }
6179 
6180 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 poll_timeout_us)
6181 {
6182 	int i, rc = 0;
6183 	u32 reg_val;
6184 
6185 	/* We poll the BTM done indication multiple times after reset due to
6186 	 * a HW errata 'GAUDI2_0300'
6187 	 */
6188 	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6189 		rc = hl_poll_timeout(
6190 			hdev,
6191 			mmPSOC_GLOBAL_CONF_BTM_FSM,
6192 			reg_val,
6193 			reg_val == 0,
6194 			1000,
6195 			poll_timeout_us);
6196 
6197 	if (rc)
6198 		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
6199 }
6200 
6201 static int gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
6202 {
6203 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6204 	u32 poll_timeout_us, reset_sleep_ms;
6205 	bool driver_performs_reset = false;
6206 	int rc;
6207 
6208 	if (hdev->pldm) {
6209 		reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
6210 						GAUDI2_PLDM_SRESET_TIMEOUT_MSEC;
6211 		poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC;
6212 	} else {
6213 		reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC;
6214 		poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC;
6215 	}
6216 
6217 	if (fw_reset)
6218 		goto skip_reset;
6219 
6220 	gaudi2_reset_arcs(hdev);
6221 
6222 	if (hard_reset) {
6223 		driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
6224 		gaudi2_execute_hard_reset(hdev);
6225 	} else {
6226 		/*
6227 		 * As we have to support also work with preboot only (which does not supports
6228 		 * soft reset) we have to make sure that security is disabled before letting driver
6229 		 * do the reset. user shall control the BFE flags to avoid asking soft reset in
6230 		 * secured device with preboot only.
6231 		 */
6232 		driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
6233 							!hdev->asic_prop.fw_security_enabled);
6234 		rc = gaudi2_execute_soft_reset(hdev, driver_performs_reset, poll_timeout_us);
6235 		if (rc)
6236 			return rc;
6237 	}
6238 
6239 skip_reset:
6240 	if (driver_performs_reset || hard_reset) {
6241 		/*
6242 		 * Instead of waiting for BTM indication we should wait for preboot ready:
6243 		 * Consider the below scenario:
6244 		 * 1. FW update is being triggered
6245 		 *        - setting the dirty bit
6246 		 * 2. hard reset will be triggered due to the dirty bit
6247 		 * 3. FW initiates the reset:
6248 		 *        - dirty bit cleared
6249 		 *        - BTM indication cleared
6250 		 *        - preboot ready indication cleared
6251 		 * 4. during hard reset:
6252 		 *        - BTM indication will be set
6253 		 *        - BIST test performed and another reset triggered
6254 		 * 5. only after this reset the preboot will set the preboot ready
6255 		 *
6256 		 * when polling on BTM indication alone we can lose sync with FW while trying to
6257 		 * communicate with FW that is during reset.
6258 		 * to overcome this we will always wait to preboot ready indication
6259 		 */
6260 
6261 		/* without this sleep reset will not work */
6262 		msleep(reset_sleep_ms);
6263 
6264 		if (hdev->fw_components & FW_TYPE_PREBOOT_CPU)
6265 			hl_fw_wait_preboot_ready(hdev);
6266 		else
6267 			gaudi2_poll_btm_indication(hdev, poll_timeout_us);
6268 	}
6269 
6270 	if (!gaudi2)
6271 		return 0;
6272 
6273 	gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
6274 	gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
6275 
6276 	/*
6277 	 * Clear NIC capability mask in order for driver to re-configure
6278 	 * NIC QMANs. NIC ports will not be re-configured during soft
6279 	 * reset as we call gaudi2_nic_init only during hard reset
6280 	 */
6281 	gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK);
6282 
6283 	if (hard_reset) {
6284 		gaudi2->hw_cap_initialized &=
6285 			~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK |
6286 			HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q |
6287 			HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK |
6288 			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA |
6289 			HW_CAP_MME_MASK | HW_CAP_ROT_MASK);
6290 
6291 		memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat));
6292 	} else {
6293 		gaudi2->hw_cap_initialized &=
6294 			~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET |
6295 			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
6296 			HW_CAP_ROT_MASK);
6297 	}
6298 	return 0;
6299 }
6300 
6301 static int gaudi2_suspend(struct hl_device *hdev)
6302 {
6303 	int rc;
6304 
6305 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
6306 	if (rc)
6307 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
6308 
6309 	return rc;
6310 }
6311 
6312 static int gaudi2_resume(struct hl_device *hdev)
6313 {
6314 	return gaudi2_init_iatu(hdev);
6315 }
6316 
6317 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
6318 		void *cpu_addr, dma_addr_t dma_addr, size_t size)
6319 {
6320 	int rc;
6321 
6322 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
6323 			VM_DONTCOPY | VM_NORESERVE);
6324 
6325 #ifdef _HAS_DMA_MMAP_COHERENT
6326 
6327 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
6328 	if (rc)
6329 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
6330 
6331 #else
6332 
6333 	rc = remap_pfn_range(vma, vma->vm_start,
6334 				virt_to_phys(cpu_addr) >> PAGE_SHIFT,
6335 				size, vma->vm_page_prot);
6336 	if (rc)
6337 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
6338 
6339 #endif
6340 
6341 	return rc;
6342 }
6343 
6344 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
6345 {
6346 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6347 	u64 hw_cap_mask = 0;
6348 	u64 hw_tpc_cap_bit = 0;
6349 	u64 hw_nic_cap_bit = 0;
6350 	u64 hw_test_cap_bit = 0;
6351 
6352 	switch (hw_queue_id) {
6353 	case GAUDI2_QUEUE_ID_PDMA_0_0:
6354 	case GAUDI2_QUEUE_ID_PDMA_0_1:
6355 	case GAUDI2_QUEUE_ID_PDMA_1_0:
6356 		hw_cap_mask = HW_CAP_PDMA_MASK;
6357 		break;
6358 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
6359 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
6360 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
6361 		break;
6362 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
6363 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE +
6364 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2);
6365 		break;
6366 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
6367 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE +
6368 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2);
6369 		break;
6370 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
6371 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE +
6372 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2);
6373 		break;
6374 
6375 	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
6376 		hw_test_cap_bit = HW_CAP_MME_SHIFT;
6377 		break;
6378 
6379 	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
6380 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 1;
6381 		break;
6382 
6383 	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
6384 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 2;
6385 		break;
6386 
6387 	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
6388 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 3;
6389 		break;
6390 
6391 	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3:
6392 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT +
6393 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2);
6394 
6395 		/* special case where cap bit refers to the first queue id */
6396 		if (!hw_tpc_cap_bit)
6397 			return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0));
6398 		break;
6399 
6400 	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
6401 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE +
6402 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2);
6403 		break;
6404 
6405 	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
6406 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) +
6407 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2);
6408 		break;
6409 
6410 	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
6411 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) +
6412 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2);
6413 		break;
6414 
6415 	case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
6416 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE);
6417 		break;
6418 
6419 	case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3:
6420 		hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2);
6421 		break;
6422 
6423 	case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3:
6424 		hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2);
6425 
6426 		/* special case where cap bit refers to the first queue id */
6427 		if (!hw_nic_cap_bit)
6428 			return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0));
6429 		break;
6430 
6431 	case GAUDI2_QUEUE_ID_CPU_PQ:
6432 		return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q);
6433 
6434 	default:
6435 		return false;
6436 	}
6437 
6438 	if (hw_tpc_cap_bit)
6439 		return  !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit));
6440 
6441 	if (hw_nic_cap_bit)
6442 		return  !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit));
6443 
6444 	if (hw_test_cap_bit)
6445 		hw_cap_mask = BIT_ULL(hw_test_cap_bit);
6446 
6447 	return !!(gaudi2->hw_cap_initialized & hw_cap_mask);
6448 }
6449 
6450 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id)
6451 {
6452 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6453 
6454 	switch (arc_id) {
6455 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6456 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6457 		return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id));
6458 
6459 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6460 		return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6461 
6462 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6463 		return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6464 
6465 	default:
6466 		return false;
6467 	}
6468 }
6469 
6470 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6471 {
6472 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6473 
6474 	switch (arc_id) {
6475 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6476 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6477 		gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id));
6478 		break;
6479 
6480 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6481 		gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6482 		break;
6483 
6484 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6485 		gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6486 		break;
6487 
6488 	default:
6489 		return;
6490 	}
6491 }
6492 
6493 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6494 {
6495 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6496 
6497 	switch (arc_id) {
6498 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6499 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6500 		gaudi2->active_hw_arc |= BIT_ULL(arc_id);
6501 		break;
6502 
6503 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6504 		gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0);
6505 		break;
6506 
6507 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6508 		gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0);
6509 		break;
6510 
6511 	default:
6512 		return;
6513 	}
6514 }
6515 
6516 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
6517 {
6518 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6519 	u32 pq_offset, reg_base, db_reg_offset, db_value;
6520 
6521 	if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) {
6522 		/*
6523 		 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs.
6524 		 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ
6525 		 * number.
6526 		 */
6527 		pq_offset = (hw_queue_id & 0x3) * 4;
6528 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6529 		db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset;
6530 	} else {
6531 		db_reg_offset = mmCPU_IF_PF_PQ_PI;
6532 	}
6533 
6534 	db_value = pi;
6535 
6536 	/* ring the doorbell */
6537 	WREG32(db_reg_offset, db_value);
6538 
6539 	if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) {
6540 		/* make sure device CPU will read latest data from host */
6541 		mb();
6542 		WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
6543 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
6544 	}
6545 }
6546 
6547 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
6548 {
6549 	__le64 *pbd = (__le64 *) bd;
6550 
6551 	/* The QMANs are on the host memory so a simple copy suffice */
6552 	pqe[0] = pbd[0];
6553 	pqe[1] = pbd[1];
6554 }
6555 
6556 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size,
6557 				dma_addr_t *dma_handle, gfp_t flags)
6558 {
6559 	return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
6560 }
6561 
6562 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size,
6563 				void *cpu_addr, dma_addr_t dma_handle)
6564 {
6565 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
6566 }
6567 
6568 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
6569 				u32 timeout, u64 *result)
6570 {
6571 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6572 
6573 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) {
6574 		if (result)
6575 			*result = 0;
6576 		return 0;
6577 	}
6578 
6579 	if (!timeout)
6580 		timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC;
6581 
6582 	return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result);
6583 }
6584 
6585 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size,
6586 				gfp_t mem_flags, dma_addr_t *dma_handle)
6587 {
6588 	if (size > GAUDI2_DMA_POOL_BLK_SIZE)
6589 		return NULL;
6590 
6591 	return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
6592 }
6593 
6594 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
6595 {
6596 	dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
6597 }
6598 
6599 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
6600 						dma_addr_t *dma_handle)
6601 {
6602 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
6603 }
6604 
6605 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
6606 {
6607 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
6608 }
6609 
6610 static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
6611 					enum dma_data_direction dir)
6612 {
6613 	dma_addr_t dma_addr;
6614 
6615 	dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
6616 	if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
6617 		return 0;
6618 
6619 	return dma_addr;
6620 }
6621 
6622 static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
6623 					enum dma_data_direction dir)
6624 {
6625 	dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
6626 }
6627 
6628 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
6629 {
6630 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
6631 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6632 
6633 	if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
6634 		dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
6635 		return -EINVAL;
6636 	}
6637 
6638 	/* Just check if CB address is valid */
6639 
6640 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6641 					parser->user_cb_size,
6642 					asic_prop->sram_user_base_address,
6643 					asic_prop->sram_end_address))
6644 		return 0;
6645 
6646 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6647 					parser->user_cb_size,
6648 					asic_prop->dram_user_base_address,
6649 					asic_prop->dram_end_address))
6650 		return 0;
6651 
6652 	if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) &&
6653 		hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6654 						parser->user_cb_size,
6655 						asic_prop->dmmu.start_addr,
6656 						asic_prop->dmmu.end_addr))
6657 		return 0;
6658 
6659 	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) {
6660 		if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6661 					parser->user_cb_size,
6662 					asic_prop->pmmu.start_addr,
6663 					asic_prop->pmmu.end_addr) ||
6664 			hl_mem_area_inside_range(
6665 					(u64) (uintptr_t) parser->user_cb,
6666 					parser->user_cb_size,
6667 					asic_prop->pmmu_huge.start_addr,
6668 					asic_prop->pmmu_huge.end_addr))
6669 			return 0;
6670 
6671 	} else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) {
6672 		if (!hdev->pdev)
6673 			return 0;
6674 
6675 		if (!device_iommu_mapped(&hdev->pdev->dev))
6676 			return 0;
6677 	}
6678 
6679 	dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n",
6680 		parser->user_cb, parser->user_cb_size);
6681 
6682 	return -EFAULT;
6683 }
6684 
6685 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
6686 {
6687 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6688 
6689 	if (!parser->is_kernel_allocated_cb)
6690 		return gaudi2_validate_cb_address(hdev, parser);
6691 
6692 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
6693 		dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n");
6694 		return -EINVAL;
6695 	}
6696 
6697 	return 0;
6698 }
6699 
6700 static int gaudi2_send_heartbeat(struct hl_device *hdev)
6701 {
6702 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6703 
6704 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6705 		return 0;
6706 
6707 	return hl_fw_send_heartbeat(hdev);
6708 }
6709 
6710 /* This is an internal helper function, used to update the KDMA mmu props.
6711  * Should be called with a proper kdma lock.
6712  */
6713 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
6714 					   bool mmu_bypass, u32 asid)
6715 {
6716 	u32 rw_asid, rw_mmu_bp;
6717 
6718 	rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6719 		      (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6720 
6721 	rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) |
6722 			(!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT);
6723 
6724 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid);
6725 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
6726 }
6727 
6728 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
6729 						u32 mon_payload, u32 sync_value)
6730 {
6731 	u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
6732 	u8 mask;
6733 
6734 	sob_offset = sob_id * 4;
6735 	mon_offset = mon_id * 4;
6736 
6737 	/* Reset the SOB value */
6738 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
6739 
6740 	/* Configure this address with CQ_ID 0 because CQ_EN is set */
6741 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
6742 
6743 	/* Configure this address with CS index because CQ_EN is set */
6744 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
6745 
6746 	sync_group_id = sob_id / 8;
6747 	mask = ~(1 << (sob_id & 0x7));
6748 	mode = 1; /* comparison mode is "equal to" */
6749 
6750 	mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
6751 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
6752 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
6753 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
6754 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
6755 }
6756 
6757 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
6758 static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
6759 					u64 src_addr, u64 dst_addr,
6760 					u32 size, bool is_memset)
6761 {
6762 	u32 comp_val, commit_mask, *polling_addr, timeout, status = 0;
6763 	struct hl_cq_entry *cq_base;
6764 	struct hl_cq *cq;
6765 	u64 comp_addr;
6766 	int rc;
6767 
6768 	gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
6769 				GAUDI2_RESERVED_MON_KDMA_COMPLETION,
6770 				GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
6771 
6772 	comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6773 			(GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
6774 
6775 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
6776 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
6777 
6778 	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr));
6779 	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr));
6780 	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr));
6781 	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr));
6782 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr));
6783 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr));
6784 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val);
6785 	WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size);
6786 
6787 	commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
6788 				FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
6789 
6790 	if (is_memset)
6791 		commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1);
6792 
6793 	WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask);
6794 
6795 	/* Wait for completion */
6796 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION];
6797 	cq_base = cq->kernel_address;
6798 	polling_addr = (u32 *)&cq_base[cq->ci];
6799 
6800 	if (hdev->pldm)
6801 		/* for each 1MB 20 second of timeout */
6802 		timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20;
6803 	else
6804 		timeout = KDMA_TIMEOUT_USEC;
6805 
6806 	/* Polling */
6807 	rc = hl_poll_timeout_memory(
6808 			hdev,
6809 			polling_addr,
6810 			status,
6811 			(status == 1),
6812 			1000,
6813 			timeout,
6814 			true);
6815 
6816 	*polling_addr = 0;
6817 
6818 	if (rc) {
6819 		dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n");
6820 		WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT);
6821 		return rc;
6822 	}
6823 
6824 	cq->ci = hl_cq_inc_ptr(cq->ci);
6825 
6826 	return 0;
6827 }
6828 
6829 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val)
6830 {
6831 	u32 i;
6832 
6833 	for (i = 0 ; i < size ; i += sizeof(u32))
6834 		WREG32(addr + i, val);
6835 }
6836 
6837 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable)
6838 {
6839 	u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6840 
6841 	if (enable) {
6842 		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE);
6843 		WREG32(reg_base + QM_PQC_CFG_OFFSET, 0);
6844 	} else {
6845 		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED);
6846 		WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
6847 	}
6848 }
6849 
6850 static inline u32 gaudi2_test_queue_hw_queue_id_to_sob_id(struct hl_device *hdev, u32 hw_queue_id)
6851 {
6852 	return hdev->asic_prop.first_available_user_sob[0] +
6853 				hw_queue_id - GAUDI2_QUEUE_ID_PDMA_0_0;
6854 }
6855 
6856 static void gaudi2_test_queue_clear(struct hl_device *hdev, u32 hw_queue_id)
6857 {
6858 	u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6859 	u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6860 
6861 	/* Reset the SOB value */
6862 	WREG32(sob_addr, 0);
6863 }
6864 
6865 static int gaudi2_test_queue_send_msg_short(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val,
6866 					    struct gaudi2_queues_test_info *msg_info)
6867 {
6868 	u32 sob_offset =  gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6869 	u32 tmp, sob_base = 1;
6870 	struct packet_msg_short *msg_short_pkt = msg_info->kern_addr;
6871 	size_t pkt_size = sizeof(struct packet_msg_short);
6872 	int rc;
6873 
6874 	tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
6875 		(1 << GAUDI2_PKT_CTL_EB_SHIFT) |
6876 		(1 << GAUDI2_PKT_CTL_MB_SHIFT) |
6877 		(sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) |
6878 		(sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT);
6879 
6880 	msg_short_pkt->value = cpu_to_le32(sob_val);
6881 	msg_short_pkt->ctl = cpu_to_le32(tmp);
6882 
6883 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, msg_info->dma_addr);
6884 	if (rc)
6885 		dev_err(hdev->dev,
6886 			"Failed to send msg_short packet to H/W queue %d\n", hw_queue_id);
6887 
6888 	return rc;
6889 }
6890 
6891 static int gaudi2_test_queue_wait_completion(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val)
6892 {
6893 	u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6894 	u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6895 	u32 timeout_usec, tmp;
6896 	int rc;
6897 
6898 	if (hdev->pldm)
6899 		timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
6900 	else
6901 		timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
6902 
6903 	rc = hl_poll_timeout(
6904 			hdev,
6905 			sob_addr,
6906 			tmp,
6907 			(tmp == sob_val),
6908 			1000,
6909 			timeout_usec);
6910 
6911 	if (rc == -ETIMEDOUT) {
6912 		dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
6913 			hw_queue_id, tmp);
6914 		rc = -EIO;
6915 	}
6916 
6917 	return rc;
6918 }
6919 
6920 static int gaudi2_test_cpu_queue(struct hl_device *hdev)
6921 {
6922 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6923 
6924 	/*
6925 	 * check capability here as send_cpu_message() won't update the result
6926 	 * value if no capability
6927 	 */
6928 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6929 		return 0;
6930 
6931 	return hl_fw_test_cpu_queue(hdev);
6932 }
6933 
6934 static int gaudi2_test_queues(struct hl_device *hdev)
6935 {
6936 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6937 	struct gaudi2_queues_test_info *msg_info;
6938 	u32 sob_val = 0x5a5a;
6939 	int i, rc;
6940 
6941 	/* send test message on all enabled Qs */
6942 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6943 		if (!gaudi2_is_queue_enabled(hdev, i))
6944 			continue;
6945 
6946 		msg_info = &gaudi2->queues_test_info[i - GAUDI2_QUEUE_ID_PDMA_0_0];
6947 		gaudi2_qman_set_test_mode(hdev, i, true);
6948 		gaudi2_test_queue_clear(hdev, i);
6949 		rc = gaudi2_test_queue_send_msg_short(hdev, i, sob_val, msg_info);
6950 		if (rc)
6951 			goto done;
6952 	}
6953 
6954 	rc = gaudi2_test_cpu_queue(hdev);
6955 	if (rc)
6956 		goto done;
6957 
6958 	/* verify that all messages were processed */
6959 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6960 		if (!gaudi2_is_queue_enabled(hdev, i))
6961 			continue;
6962 
6963 		rc = gaudi2_test_queue_wait_completion(hdev, i, sob_val);
6964 		if (rc)
6965 			/* chip is not usable, no need for cleanups, just bail-out with error */
6966 			goto done;
6967 
6968 		gaudi2_test_queue_clear(hdev, i);
6969 		gaudi2_qman_set_test_mode(hdev, i, false);
6970 	}
6971 
6972 done:
6973 	return rc;
6974 }
6975 
6976 static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
6977 {
6978 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6979 	size_t irq_arr_size;
6980 	int rc;
6981 
6982 	gaudi2_init_arcs(hdev);
6983 
6984 	rc = gaudi2_scrub_arcs_dccm(hdev);
6985 	if (rc) {
6986 		dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
6987 		return rc;
6988 	}
6989 
6990 	gaudi2_init_security(hdev);
6991 
6992 	/* Unmask all IRQs since some could have been received during the soft reset */
6993 	irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]);
6994 	return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
6995 }
6996 
6997 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
6998 		struct engines_data *e)
6999 {
7000 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7001 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7002 	unsigned long *mask = (unsigned long *) mask_arr;
7003 	const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#-15x%#x\n";
7004 	bool is_idle = true, is_eng_idle;
7005 	int engine_idx, i, j;
7006 	u64 offset;
7007 
7008 	if (e)
7009 		hl_engine_data_sprintf(e,
7010 			"\nCORE  EDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0  DMA_CORE_STS1\n"
7011 			"----  ----  -------  ------------  -------------  -------------\n");
7012 
7013 	for (i = 0; i < NUM_OF_DCORES; i++) {
7014 		for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
7015 			int seq = i * NUM_OF_EDMA_PER_DCORE + j;
7016 
7017 			if (!(prop->edma_enabled_mask & BIT(seq)))
7018 				continue;
7019 
7020 			engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 +
7021 					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7022 			offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
7023 
7024 			dma_core_sts0 = RREG32(mmDCORE0_EDMA0_CORE_STS0 + offset);
7025 			dma_core_sts1 = RREG32(mmDCORE0_EDMA0_CORE_STS1 + offset);
7026 
7027 			qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
7028 			qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
7029 			qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
7030 
7031 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7032 					IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7033 			is_idle &= is_eng_idle;
7034 
7035 			if (mask && !is_eng_idle)
7036 				set_bit(engine_idx, mask);
7037 
7038 			if (e)
7039 				hl_engine_data_sprintf(e, edma_fmt, i, j, is_eng_idle ? "Y" : "N",
7040 							qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7041 		}
7042 	}
7043 
7044 	return is_idle;
7045 }
7046 
7047 static bool gaudi2_get_pdma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7048 		struct engines_data *e)
7049 {
7050 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7051 	unsigned long *mask = (unsigned long *) mask_arr;
7052 	const char *pdma_fmt = "%-6d%-9s%#-14x%#-15x%#x\n";
7053 	bool is_idle = true, is_eng_idle;
7054 	int engine_idx, i;
7055 	u64 offset;
7056 
7057 	if (e)
7058 		hl_engine_data_sprintf(e,
7059 					"\nPDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0  DMA_CORE_STS1\n"
7060 					"----  -------  ------------  -------------  -------------\n");
7061 
7062 	for (i = 0 ; i < NUM_OF_PDMA ; i++) {
7063 		engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
7064 		offset = i * PDMA_OFFSET;
7065 		dma_core_sts0 = RREG32(mmPDMA0_CORE_STS0 + offset);
7066 		dma_core_sts1 = RREG32(mmPDMA0_CORE_STS1 + offset);
7067 
7068 		qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
7069 		qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
7070 		qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
7071 
7072 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7073 				IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7074 		is_idle &= is_eng_idle;
7075 
7076 		if (mask && !is_eng_idle)
7077 			set_bit(engine_idx, mask);
7078 
7079 		if (e)
7080 			hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
7081 						qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7082 	}
7083 
7084 	return is_idle;
7085 }
7086 
7087 static bool gaudi2_get_nic_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7088 		struct engines_data *e)
7089 {
7090 	unsigned long *mask = (unsigned long *) mask_arr;
7091 	const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
7092 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7093 	bool is_idle = true, is_eng_idle;
7094 	int engine_idx, i;
7095 	u64 offset = 0;
7096 
7097 	/* NIC, twelve macros in Full chip */
7098 	if (e && hdev->nic_ports_mask)
7099 		hl_engine_data_sprintf(e,
7100 					"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
7101 					"---  -------  ------------  ----------\n");
7102 
7103 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
7104 		if (!(i & 1))
7105 			offset = i / 2 * NIC_OFFSET;
7106 		else
7107 			offset += NIC_QM_OFFSET;
7108 
7109 		if (!(hdev->nic_ports_mask & BIT(i)))
7110 			continue;
7111 
7112 		engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i;
7113 
7114 
7115 		qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
7116 		qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset);
7117 		qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
7118 
7119 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7120 		is_idle &= is_eng_idle;
7121 
7122 		if (mask && !is_eng_idle)
7123 			set_bit(engine_idx, mask);
7124 
7125 		if (e)
7126 			hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
7127 						qm_glbl_sts0, qm_cgm_sts);
7128 	}
7129 
7130 	return is_idle;
7131 }
7132 
7133 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7134 		struct engines_data *e)
7135 {
7136 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, mme_arch_sts;
7137 	unsigned long *mask = (unsigned long *) mask_arr;
7138 	const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
7139 	bool is_idle = true, is_eng_idle;
7140 	int engine_idx, i;
7141 	u64 offset;
7142 
7143 	if (e)
7144 		hl_engine_data_sprintf(e,
7145 					"\nMME  Stub  is_idle  QM_GLBL_STS0  MME_ARCH_STATUS\n"
7146 					"---  ----  -------  ------------  ---------------\n");
7147 	/* MME, one per Dcore */
7148 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7149 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
7150 		offset = i * DCORE_OFFSET;
7151 
7152 		qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset);
7153 		qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset);
7154 		qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset);
7155 
7156 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7157 		is_idle &= is_eng_idle;
7158 
7159 		mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset);
7160 		is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
7161 		is_idle &= is_eng_idle;
7162 
7163 		if (e)
7164 			hl_engine_data_sprintf(e, mme_fmt, i, "N",
7165 				is_eng_idle ? "Y" : "N",
7166 				qm_glbl_sts0,
7167 				mme_arch_sts);
7168 
7169 		if (mask && !is_eng_idle)
7170 			set_bit(engine_idx, mask);
7171 	}
7172 
7173 	return is_idle;
7174 }
7175 
7176 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
7177 					struct iterate_module_ctx *ctx)
7178 {
7179 	struct gaudi2_tpc_idle_data *idle_data = ctx->data;
7180 	u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7181 	bool is_eng_idle;
7182 	int engine_idx;
7183 
7184 	if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
7185 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7186 	else
7187 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
7188 				dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
7189 
7190 	tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
7191 	qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
7192 	qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
7193 	qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
7194 
7195 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7196 						IS_TPC_IDLE(tpc_cfg_sts);
7197 	*(idle_data->is_idle) &= is_eng_idle;
7198 
7199 	if (idle_data->mask && !is_eng_idle)
7200 		set_bit(engine_idx, idle_data->mask);
7201 
7202 	if (idle_data->e)
7203 		hl_engine_data_sprintf(idle_data->e,
7204 					idle_data->tpc_fmt, dcore, inst,
7205 					is_eng_idle ? "Y" : "N",
7206 					qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
7207 }
7208 
7209 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7210 		struct engines_data *e)
7211 {
7212 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7213 	unsigned long *mask = (unsigned long *) mask_arr;
7214 	bool is_idle = true;
7215 
7216 	struct gaudi2_tpc_idle_data tpc_idle_data = {
7217 		.tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
7218 		.e = e,
7219 		.mask = mask,
7220 		.is_idle = &is_idle,
7221 	};
7222 	struct iterate_module_ctx tpc_iter = {
7223 		.fn = &gaudi2_is_tpc_engine_idle,
7224 		.data = &tpc_idle_data,
7225 	};
7226 
7227 	if (e && prop->tpc_enabled_mask)
7228 		hl_engine_data_sprintf(e,
7229 			"\nCORE  TPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  STATUS\n"
7230 			"----  ---  -------  ------------  ----------  ------\n");
7231 
7232 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
7233 
7234 	return tpc_idle_data.is_idle;
7235 }
7236 
7237 static bool gaudi2_get_decoder_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7238 		struct engines_data *e)
7239 {
7240 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7241 	unsigned long *mask = (unsigned long *) mask_arr;
7242 	const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
7243 	const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
7244 	bool is_idle = true, is_eng_idle;
7245 	u32 dec_swreg15, dec_enabled_bit;
7246 	int engine_idx, i, j;
7247 	u64 offset;
7248 
7249 	/* Decoders, two each Dcore and two shared PCIe decoders */
7250 	if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
7251 		hl_engine_data_sprintf(e,
7252 			"\nCORE  DEC  is_idle  VSI_CMD_SWREG15\n"
7253 			"----  ---  -------  ---------------\n");
7254 
7255 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7256 		for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) {
7257 			dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j);
7258 			if (!(prop->decoder_enabled_mask & dec_enabled_bit))
7259 				continue;
7260 
7261 			engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 +
7262 					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7263 			offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET;
7264 
7265 			dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset);
7266 			is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7267 			is_idle &= is_eng_idle;
7268 
7269 			if (mask && !is_eng_idle)
7270 				set_bit(engine_idx, mask);
7271 
7272 			if (e)
7273 				hl_engine_data_sprintf(e, dec_fmt, i, j,
7274 							is_eng_idle ? "Y" : "N", dec_swreg15);
7275 		}
7276 	}
7277 
7278 	if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
7279 		hl_engine_data_sprintf(e,
7280 			"\nPCIe DEC  is_idle  VSI_CMD_SWREG15\n"
7281 			"--------  -------  ---------------\n");
7282 
7283 	/* Check shared(PCIe) decoders */
7284 	for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) {
7285 		dec_enabled_bit = PCIE_DEC_SHIFT + i;
7286 		if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit)))
7287 			continue;
7288 
7289 		engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i;
7290 		offset = i * DCORE_DEC_OFFSET;
7291 		dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset);
7292 		is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7293 		is_idle &= is_eng_idle;
7294 
7295 		if (mask && !is_eng_idle)
7296 			set_bit(engine_idx, mask);
7297 
7298 		if (e)
7299 			hl_engine_data_sprintf(e, pcie_dec_fmt, i,
7300 						is_eng_idle ? "Y" : "N", dec_swreg15);
7301 	}
7302 
7303 	return is_idle;
7304 }
7305 
7306 static bool gaudi2_get_rotator_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7307 		struct engines_data *e)
7308 {
7309 	const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-14x%#x\n";
7310 	unsigned long *mask = (unsigned long *) mask_arr;
7311 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7312 	bool is_idle = true, is_eng_idle;
7313 	int engine_idx, i;
7314 	u64 offset;
7315 
7316 	if (e)
7317 		hl_engine_data_sprintf(e,
7318 			"\nCORE  ROT  is_idle  QM_GLBL_STS0  QM_GLBL_STS1  QM_CGM_STS\n"
7319 			"----  ---  -------  ------------  ------------  ----------\n");
7320 
7321 	for (i = 0 ; i < NUM_OF_ROT ; i++) {
7322 		engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
7323 
7324 		offset = i * ROT_OFFSET;
7325 
7326 		qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset);
7327 		qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset);
7328 		qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset);
7329 
7330 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7331 		is_idle &= is_eng_idle;
7332 
7333 		if (mask && !is_eng_idle)
7334 			set_bit(engine_idx, mask);
7335 
7336 		if (e)
7337 			hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
7338 						qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7339 	}
7340 
7341 	return is_idle;
7342 }
7343 
7344 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7345 					struct engines_data *e)
7346 {
7347 	bool is_idle = true;
7348 
7349 	is_idle &= gaudi2_get_edma_idle_status(hdev, mask_arr, mask_len, e);
7350 	is_idle &= gaudi2_get_pdma_idle_status(hdev, mask_arr, mask_len, e);
7351 	is_idle &= gaudi2_get_nic_idle_status(hdev, mask_arr, mask_len, e);
7352 	is_idle &= gaudi2_get_mme_idle_status(hdev, mask_arr, mask_len, e);
7353 	is_idle &= gaudi2_get_tpc_idle_status(hdev, mask_arr, mask_len, e);
7354 	is_idle &= gaudi2_get_decoder_idle_status(hdev, mask_arr, mask_len, e);
7355 	is_idle &= gaudi2_get_rotator_idle_status(hdev, mask_arr, mask_len, e);
7356 
7357 	return is_idle;
7358 }
7359 
7360 static void gaudi2_hw_queues_lock(struct hl_device *hdev)
7361 	__acquires(&gaudi2->hw_queues_lock)
7362 {
7363 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7364 
7365 	spin_lock(&gaudi2->hw_queues_lock);
7366 }
7367 
7368 static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
7369 	__releases(&gaudi2->hw_queues_lock)
7370 {
7371 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7372 
7373 	spin_unlock(&gaudi2->hw_queues_lock);
7374 }
7375 
7376 static u32 gaudi2_get_pci_id(struct hl_device *hdev)
7377 {
7378 	return hdev->pdev->device;
7379 }
7380 
7381 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
7382 {
7383 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7384 
7385 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
7386 		return 0;
7387 
7388 	return hl_fw_get_eeprom_data(hdev, data, max_size);
7389 }
7390 
7391 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val)
7392 {
7393 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
7394 }
7395 
7396 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7397 {
7398 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7399 
7400 	if (aggregate) {
7401 		*size = (u32) sizeof(gaudi2->events_stat_aggregate);
7402 		return gaudi2->events_stat_aggregate;
7403 	}
7404 
7405 	*size = (u32) sizeof(gaudi2->events_stat);
7406 	return gaudi2->events_stat;
7407 }
7408 
7409 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id,
7410 				int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7411 {
7412 	u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) *
7413 			dcore_vdec_id + DCORE_OFFSET * dcore_id;
7414 
7415 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7416 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7417 
7418 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7419 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7420 
7421 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7422 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7423 
7424 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7425 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7426 
7427 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7428 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7429 }
7430 
7431 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid)
7432 {
7433 	u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7434 			(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7435 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7436 	u32 dcore_offset = dcore_id * DCORE_OFFSET;
7437 	u32 vdec_id, i, ports_offset, reg_val;
7438 	u8 edma_seq_base;
7439 
7440 	/* EDMA */
7441 	edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE;
7442 	if (prop->edma_enabled_mask & BIT(edma_seq_base)) {
7443 		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7444 		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7445 		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7446 		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7447 	}
7448 
7449 	if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) {
7450 		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7451 		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7452 		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7453 		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7454 	}
7455 
7456 	/* Sync Mngr */
7457 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid);
7458 	/*
7459 	 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID
7460 	 * for any access type
7461 	 */
7462 	if (dcore_id > 0) {
7463 		reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) |
7464 			  (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT);
7465 		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val);
7466 		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0);
7467 	}
7468 
7469 	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0);
7470 	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid);
7471 
7472 	for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) {
7473 		ports_offset = i * DCORE_MME_SBTE_OFFSET;
7474 		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP +
7475 				dcore_offset + ports_offset, 0);
7476 		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID +
7477 				dcore_offset + ports_offset, rw_asid);
7478 	}
7479 
7480 	for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) {
7481 		ports_offset = i * DCORE_MME_WB_OFFSET;
7482 		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP +
7483 				dcore_offset + ports_offset, 0);
7484 		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID +
7485 				dcore_offset + ports_offset, rw_asid);
7486 	}
7487 
7488 	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7489 	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7490 
7491 	/*
7492 	 * Decoders
7493 	 */
7494 	for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) {
7495 		if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id))
7496 			gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0);
7497 	}
7498 }
7499 
7500 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev,
7501 				int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7502 {
7503 	u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id;
7504 
7505 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7506 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7507 
7508 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7509 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7510 
7511 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7512 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7513 
7514 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7515 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7516 
7517 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7518 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7519 }
7520 
7521 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id,
7522 							u32 rw_asid, u32 rw_mmu_bp)
7523 {
7524 	u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id;
7525 
7526 	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp);
7527 	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid);
7528 }
7529 
7530 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid)
7531 {
7532 	u32 reg_base, reg_offset, reg_val = 0;
7533 
7534 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
7535 
7536 	/* Enable MMU and configure asid for all relevant ARC regions */
7537 	reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0);
7538 	reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid);
7539 
7540 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL);
7541 	WREG32(reg_base + reg_offset, reg_val);
7542 
7543 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW);
7544 	WREG32(reg_base + reg_offset, reg_val);
7545 
7546 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA);
7547 	WREG32(reg_base + reg_offset, reg_val);
7548 
7549 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA);
7550 	WREG32(reg_base + reg_offset, reg_val);
7551 
7552 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA);
7553 	WREG32(reg_base + reg_offset, reg_val);
7554 
7555 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE);
7556 	WREG32(reg_base + reg_offset, reg_val);
7557 
7558 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL);
7559 	WREG32(reg_base + reg_offset, reg_val);
7560 
7561 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL);
7562 	WREG32(reg_base + reg_offset, reg_val);
7563 
7564 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL);
7565 	WREG32(reg_base + reg_offset, reg_val);
7566 
7567 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL);
7568 	WREG32(reg_base + reg_offset, reg_val);
7569 
7570 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL);
7571 	WREG32(reg_base + reg_offset, reg_val);
7572 }
7573 
7574 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid)
7575 {
7576 	int i;
7577 
7578 	if (hdev->fw_components & FW_TYPE_BOOT_CPU)
7579 		return hl_fw_cpucp_engine_core_asid_set(hdev, asid);
7580 
7581 	for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7582 		gaudi2_arc_mmu_prepare(hdev, i, asid);
7583 
7584 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
7585 		if (!gaudi2_is_queue_enabled(hdev, i))
7586 			continue;
7587 
7588 		gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid);
7589 	}
7590 
7591 	return 0;
7592 }
7593 
7594 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
7595 {
7596 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7597 	u32 rw_asid, offset;
7598 	int rc, i;
7599 
7600 	rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) |
7601 			FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid);
7602 
7603 	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7604 	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7605 	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7606 	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7607 
7608 	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7609 	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7610 	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7611 	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7612 
7613 	/* ROT */
7614 	for (i = 0 ; i < NUM_OF_ROT ; i++) {
7615 		offset = i * ROT_OFFSET;
7616 		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid);
7617 		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7618 		RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK);
7619 		RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK);
7620 		RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK);
7621 	}
7622 
7623 	/* Shared Decoders are the last bits in the decoders mask */
7624 	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0))
7625 		gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0);
7626 
7627 	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1))
7628 		gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0);
7629 
7630 	/* arc farm arc dup eng */
7631 	for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7632 		gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0);
7633 
7634 	rc = gaudi2_arc_mmu_prepare_all(hdev, asid);
7635 	if (rc)
7636 		return rc;
7637 
7638 	return 0;
7639 }
7640 
7641 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst,	u32 offset,
7642 					struct iterate_module_ctx *ctx)
7643 {
7644 	struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
7645 
7646 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
7647 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
7648 	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7649 	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid);
7650 }
7651 
7652 /* zero the MMUBP and set the ASID */
7653 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid)
7654 {
7655 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7656 	struct gaudi2_tpc_mmu_data tpc_mmu_data;
7657 	struct iterate_module_ctx tpc_iter = {
7658 		.fn = &gaudi2_tpc_mmu_prepare,
7659 		.data = &tpc_mmu_data,
7660 	};
7661 	int rc, i;
7662 
7663 	if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) {
7664 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
7665 		return -EINVAL;
7666 	}
7667 
7668 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
7669 		return 0;
7670 
7671 	rc = gaudi2_mmu_shared_prepare(hdev, asid);
7672 	if (rc)
7673 		return rc;
7674 
7675 	/* configure DCORE MMUs */
7676 	tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7677 				(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7678 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
7679 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
7680 		gaudi2_mmu_dcore_prepare(hdev, i, asid);
7681 
7682 	return 0;
7683 }
7684 
7685 static inline bool is_info_event(u32 event)
7686 {
7687 	switch (event) {
7688 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
7689 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
7690 
7691 	/* return in case of NIC status event - these events are received periodically and not as
7692 	 * an indication to an error.
7693 	 */
7694 	case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
7695 		return true;
7696 	default:
7697 		return false;
7698 	}
7699 }
7700 
7701 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
7702 			bool ratelimited, const char *fmt, ...)
7703 {
7704 	struct va_format vaf;
7705 	va_list args;
7706 
7707 	va_start(args, fmt);
7708 	vaf.fmt = fmt;
7709 	vaf.va = &args;
7710 
7711 	if (ratelimited)
7712 		dev_err_ratelimited(hdev->dev, "%s: %pV\n",
7713 			gaudi2_irq_map_table[event_type].valid ?
7714 			gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7715 	else
7716 		dev_err(hdev->dev, "%s: %pV\n",
7717 			gaudi2_irq_map_table[event_type].valid ?
7718 			gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7719 
7720 	va_end(args);
7721 }
7722 
7723 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7724 		struct hl_eq_ecc_data *ecc_data)
7725 {
7726 	u64 ecc_address = 0, ecc_syndrom = 0;
7727 	u8 memory_wrapper_idx = 0;
7728 
7729 	ecc_address = le64_to_cpu(ecc_data->ecc_address);
7730 	ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7731 	memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7732 
7733 	gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
7734 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.",
7735 		ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
7736 
7737 	return !!ecc_data->is_critical;
7738 }
7739 
7740 /*
7741  * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7742  *
7743  * @idx: the current pi/ci value
7744  * @q_len: the queue length (power of 2)
7745  *
7746  * @return the cyclically decremented index
7747  */
7748 static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len)
7749 {
7750 	u32 mask = q_len - 1;
7751 
7752 	/*
7753 	 * modular decrement is equivalent to adding (queue_size -1)
7754 	 * later we take LSBs to make sure the value is in the
7755 	 * range [0, queue_len - 1]
7756 	 */
7757 	return (idx + q_len - 1) & mask;
7758 }
7759 
7760 /**
7761  * gaudi2_print_sw_config_stream_data - print SW config stream data
7762  *
7763  * @hdev: pointer to the habanalabs device structure
7764  * @stream: the QMAN's stream
7765  * @qman_base: base address of QMAN registers block
7766  */
7767 static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev,
7768 						u32 stream, u64 qman_base)
7769 {
7770 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7771 	u32 cq_ptr_lo_off, size;
7772 
7773 	cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0;
7774 
7775 	cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) +
7776 									stream * cq_ptr_lo_off;
7777 
7778 	cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
7779 
7780 	cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
7781 
7782 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7783 	size = RREG32(cq_tsize);
7784 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
7785 		stream, cq_ptr, size);
7786 }
7787 
7788 /**
7789  * gaudi2_print_last_pqes_on_err - print last PQEs on error
7790  *
7791  * @hdev: pointer to the habanalabs device structure
7792  * @qid_base: first QID of the QMAN (out of 4 streams)
7793  * @stream: the QMAN's stream
7794  * @qman_base: base address of QMAN registers block
7795  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7796  */
7797 static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream,
7798 						u64 qman_base, bool pr_sw_conf)
7799 {
7800 	u32 ci, qm_ci_stream_off;
7801 	struct hl_hw_queue *q;
7802 	u64 pq_ci;
7803 	int i;
7804 
7805 	q = &hdev->kernel_queues[qid_base + stream];
7806 
7807 	qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0;
7808 	pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) +
7809 						stream * qm_ci_stream_off;
7810 
7811 	hdev->asic_funcs->hw_queues_lock(hdev);
7812 
7813 	if (pr_sw_conf)
7814 		gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
7815 
7816 	ci = RREG32(pq_ci);
7817 
7818 	/* we should start printing form ci -1 */
7819 	ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
7820 
7821 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7822 		struct hl_bd *bd;
7823 		u64 addr;
7824 		u32 len;
7825 
7826 		bd = q->kernel_address;
7827 		bd += ci;
7828 
7829 		len = le32_to_cpu(bd->len);
7830 		/* len 0 means uninitialized entry- break */
7831 		if (!len)
7832 			break;
7833 
7834 		addr = le64_to_cpu(bd->ptr);
7835 
7836 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7837 			stream, ci, addr, len);
7838 
7839 		/* get previous ci, wrap if needed */
7840 		ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
7841 	}
7842 
7843 	hdev->asic_funcs->hw_queues_unlock(hdev);
7844 }
7845 
7846 /**
7847  * print_qman_data_on_err - extract QMAN data on error
7848  *
7849  * @hdev: pointer to the habanalabs device structure
7850  * @qid_base: first QID of the QMAN (out of 4 streams)
7851  * @stream: the QMAN's stream
7852  * @qman_base: base address of QMAN registers block
7853  *
7854  * This function attempt to extract as much data as possible on QMAN error.
7855  * On upper CP print the SW config stream data and last 8 PQEs.
7856  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7857  */
7858 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base)
7859 {
7860 	u32 i;
7861 
7862 	if (stream != QMAN_STREAMS) {
7863 		gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true);
7864 		return;
7865 	}
7866 
7867 	gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
7868 
7869 	for (i = 0 ; i < QMAN_STREAMS ; i++)
7870 		gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false);
7871 }
7872 
7873 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
7874 							u64 qman_base, u32 qid_base)
7875 {
7876 	u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
7877 	u64 glbl_sts_addr, arb_err_addr;
7878 	char reg_desc[32];
7879 
7880 	glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE);
7881 	arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE);
7882 
7883 	/* Iterate through all stream GLBL_ERR_STS registers + Lower CP */
7884 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7885 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7886 
7887 		if (!glbl_sts_val)
7888 			continue;
7889 
7890 		if (i == QMAN_STREAMS) {
7891 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7892 			num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE;
7893 		} else {
7894 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7895 			num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
7896 		}
7897 
7898 		for (j = 0 ; j < num_error_causes ; j++)
7899 			if (glbl_sts_val & BIT(j)) {
7900 				gaudi2_print_event(hdev, event_type, true,
7901 					"%s. err cause: %s", reg_desc,
7902 					i == QMAN_STREAMS ?
7903 					gaudi2_qman_lower_cp_error_cause[j] :
7904 					gaudi2_qman_error_cause[j]);
7905 				error_count++;
7906 			}
7907 
7908 		print_qman_data_on_err(hdev, qid_base, i, qman_base);
7909 	}
7910 
7911 	arb_err_val = RREG32(arb_err_addr);
7912 
7913 	if (!arb_err_val)
7914 		goto out;
7915 
7916 	for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7917 		if (arb_err_val & BIT(j)) {
7918 			gaudi2_print_event(hdev, event_type, true,
7919 				"ARB_ERR. err cause: %s",
7920 				gaudi2_qman_arb_error_cause[j]);
7921 			error_count++;
7922 		}
7923 	}
7924 
7925 out:
7926 	return error_count;
7927 }
7928 
7929 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
7930 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7931 			enum gaudi2_engine_id id, u64 *event_mask)
7932 {
7933 	u32 razwi_hi, razwi_lo, razwi_xy;
7934 	u16 eng_id = id;
7935 	u8 rd_wr_flag;
7936 
7937 	if (is_write) {
7938 		razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI);
7939 		razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO);
7940 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY);
7941 		rd_wr_flag = HL_RAZWI_WRITE;
7942 	} else {
7943 		razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
7944 		razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO);
7945 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY);
7946 		rd_wr_flag = HL_RAZWI_READ;
7947 	}
7948 
7949 	hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
7950 				rd_wr_flag | HL_RAZWI_HBW, event_mask);
7951 
7952 	dev_err_ratelimited(hdev->dev,
7953 		"%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
7954 		name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
7955 }
7956 
7957 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
7958 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7959 			enum gaudi2_engine_id id, u64 *event_mask)
7960 {
7961 	u64 razwi_addr = CFG_BASE;
7962 	u32 razwi_xy;
7963 	u16 eng_id = id;
7964 	u8 rd_wr_flag;
7965 
7966 	if (is_write) {
7967 		razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
7968 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
7969 		rd_wr_flag = HL_RAZWI_WRITE;
7970 	} else {
7971 		razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
7972 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
7973 		rd_wr_flag = HL_RAZWI_READ;
7974 	}
7975 
7976 	hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask);
7977 	dev_err_ratelimited(hdev->dev,
7978 				"%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%llX Initiator coordinates 0x%x\n",
7979 				name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
7980 						razwi_xy);
7981 }
7982 
7983 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
7984 						enum razwi_event_sources module, u8 module_idx)
7985 {
7986 	switch (module) {
7987 	case RAZWI_TPC:
7988 		if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES))
7989 			return GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7990 		return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7991 				(module_idx % NUM_OF_TPC_PER_DCORE) +
7992 				(GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
7993 
7994 	case RAZWI_MME:
7995 		return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) +
7996 			(module_idx * ENGINE_ID_DCORE_OFFSET));
7997 
7998 	case RAZWI_EDMA:
7999 		return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8000 			(module_idx % NUM_OF_EDMA_PER_DCORE));
8001 
8002 	case RAZWI_PDMA:
8003 		return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx);
8004 
8005 	case RAZWI_NIC:
8006 		return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx));
8007 
8008 	case RAZWI_DEC:
8009 		if (module_idx == 8)
8010 			return GAUDI2_PCIE_ENGINE_ID_DEC_0;
8011 
8012 		if (module_idx == 9)
8013 			return GAUDI2_PCIE_ENGINE_ID_DEC_1;
8014 					;
8015 		return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8016 				(module_idx % NUM_OF_DEC_PER_DCORE) +
8017 				(GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
8018 
8019 	case RAZWI_ROT:
8020 		return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
8021 
8022 	default:
8023 		return GAUDI2_ENGINE_ID_SIZE;
8024 	}
8025 }
8026 
8027 /*
8028  * This function handles RR(Range register) hit events.
8029  * raised be initiators not PSOC RAZWI.
8030  */
8031 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
8032 				enum razwi_event_sources module, u8 module_idx,
8033 				u8 module_sub_idx, u64 *event_mask)
8034 {
8035 	bool via_sft = false;
8036 	u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, eng_id;
8037 	u64 hbw_rtr_mstr_if_base_addr, lbw_rtr_mstr_if_base_addr;
8038 	u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
8039 	u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
8040 	char initiator_name[64];
8041 
8042 	switch (module) {
8043 	case RAZWI_TPC:
8044 		hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx];
8045 
8046 		if (hl_is_fw_ver_below_1_9(hdev) &&
8047 				!hdev->asic_prop.fw_security_enabled &&
8048 				((module_idx == 0) || (module_idx == 1)))
8049 			lbw_rtr_id = DCORE0_RTR0;
8050 		else
8051 			lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx];
8052 		sprintf(initiator_name, "TPC_%u", module_idx);
8053 		break;
8054 	case RAZWI_MME:
8055 		sprintf(initiator_name, "MME_%u", module_idx);
8056 		switch (module_sub_idx) {
8057 		case MME_WAP0:
8058 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
8059 			break;
8060 		case MME_WAP1:
8061 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
8062 			break;
8063 		case MME_WRITE:
8064 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
8065 			break;
8066 		case MME_READ:
8067 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
8068 			break;
8069 		case MME_SBTE0:
8070 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
8071 			break;
8072 		case MME_SBTE1:
8073 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
8074 			break;
8075 		case MME_SBTE2:
8076 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
8077 			break;
8078 		case MME_SBTE3:
8079 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
8080 			break;
8081 		case MME_SBTE4:
8082 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
8083 			break;
8084 		default:
8085 			return;
8086 		}
8087 		lbw_rtr_id = hbw_rtr_id;
8088 		break;
8089 	case RAZWI_EDMA:
8090 		hbw_rtr_mstr_if_base_addr = gaudi2_edma_initiator_hbw_sft[module_idx];
8091 		dcore_id = module_idx / NUM_OF_EDMA_PER_DCORE;
8092 		/* SFT has separate MSTR_IF for LBW, only there we can
8093 		 * read the LBW razwi related registers
8094 		 */
8095 		lbw_rtr_mstr_if_base_addr = mmSFT0_LBW_RTR_IF_MSTR_IF_RR_SHRD_HBW_BASE +
8096 								dcore_id * SFT_DCORE_OFFSET;
8097 		via_sft = true;
8098 		sprintf(initiator_name, "EDMA_%u", module_idx);
8099 		break;
8100 	case RAZWI_PDMA:
8101 		hbw_rtr_id = gaudi2_pdma_initiator_hbw_rtr_id[module_idx];
8102 		lbw_rtr_id = gaudi2_pdma_initiator_lbw_rtr_id[module_idx];
8103 		sprintf(initiator_name, "PDMA_%u", module_idx);
8104 		break;
8105 	case RAZWI_NIC:
8106 		hbw_rtr_id = gaudi2_nic_initiator_hbw_rtr_id[module_idx];
8107 		lbw_rtr_id = gaudi2_nic_initiator_lbw_rtr_id[module_idx];
8108 		sprintf(initiator_name, "NIC_%u", module_idx);
8109 		break;
8110 	case RAZWI_DEC:
8111 		hbw_rtr_id = gaudi2_dec_initiator_hbw_rtr_id[module_idx];
8112 		lbw_rtr_id = gaudi2_dec_initiator_lbw_rtr_id[module_idx];
8113 		sprintf(initiator_name, "DEC_%u", module_idx);
8114 		break;
8115 	case RAZWI_ROT:
8116 		hbw_rtr_id = gaudi2_rot_initiator_hbw_rtr_id[module_idx];
8117 		lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx];
8118 		sprintf(initiator_name, "ROT_%u", module_idx);
8119 		break;
8120 	default:
8121 		return;
8122 	}
8123 
8124 	/* Find router mstr_if register base */
8125 	if (!via_sft) {
8126 		dcore_id = hbw_rtr_id / NUM_OF_RTR_PER_DCORE;
8127 		dcore_rtr_id = hbw_rtr_id % NUM_OF_RTR_PER_DCORE;
8128 		hbw_rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
8129 				dcore_id * DCORE_OFFSET +
8130 				dcore_rtr_id * DCORE_RTR_OFFSET +
8131 				RTR_MSTR_IF_OFFSET;
8132 		lbw_rtr_mstr_if_base_addr = hbw_rtr_mstr_if_base_addr +
8133 				(((s32)lbw_rtr_id - hbw_rtr_id) * DCORE_RTR_OFFSET);
8134 	}
8135 
8136 	/* Find out event cause by reading "RAZWI_HAPPENED" registers */
8137 	hbw_shrd_aw = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
8138 	hbw_shrd_ar = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
8139 	lbw_shrd_aw = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
8140 	lbw_shrd_ar = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
8141 
8142 	eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx);
8143 	if (hbw_shrd_aw) {
8144 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, true,
8145 						initiator_name, eng_id, event_mask);
8146 
8147 		/* Clear event indication */
8148 		WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
8149 	}
8150 
8151 	if (hbw_shrd_ar) {
8152 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, false,
8153 						initiator_name, eng_id, event_mask);
8154 
8155 		/* Clear event indication */
8156 		WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
8157 	}
8158 
8159 	if (lbw_shrd_aw) {
8160 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, true,
8161 						initiator_name, eng_id, event_mask);
8162 
8163 		/* Clear event indication */
8164 		WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
8165 	}
8166 
8167 	if (lbw_shrd_ar) {
8168 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, false,
8169 						initiator_name, eng_id, event_mask);
8170 
8171 		/* Clear event indication */
8172 		WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
8173 	}
8174 }
8175 
8176 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
8177 {
8178 	struct asic_fixed_properties *prop = &hdev->asic_prop;
8179 	u8 mod_idx, sub_mod;
8180 
8181 	/* check all TPCs */
8182 	for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
8183 		if (prop->tpc_enabled_mask & BIT(mod_idx))
8184 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL);
8185 	}
8186 
8187 	/* check all MMEs */
8188 	for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8189 		for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
8190 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
8191 									sub_mod, NULL);
8192 
8193 	/* check all EDMAs */
8194 	for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8195 		if (prop->edma_enabled_mask & BIT(mod_idx))
8196 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL);
8197 
8198 	/* check all PDMAs */
8199 	for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
8200 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL);
8201 
8202 	/* check all NICs */
8203 	for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
8204 		if (hdev->nic_ports_mask & BIT(mod_idx))
8205 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
8206 								NULL);
8207 
8208 	/* check all DECs */
8209 	for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
8210 		if (prop->decoder_enabled_mask & BIT(mod_idx))
8211 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL);
8212 
8213 	/* check all ROTs */
8214 	for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
8215 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL);
8216 }
8217 
8218 static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u32 array_size,
8219 						u32 axuser_xy, u32 *base, u16 *eng_id,
8220 						char *eng_name)
8221 {
8222 
8223 	int i, num_of_eng = 0;
8224 	u16 str_size = 0;
8225 
8226 	for (i = 0 ; i < array_size ; i++) {
8227 		if (axuser_xy != razwi_info[i].axuser_xy)
8228 			continue;
8229 
8230 		eng_id[num_of_eng] = razwi_info[i].eng_id;
8231 		base[num_of_eng] = razwi_info[i].rtr_ctrl;
8232 		if (!num_of_eng)
8233 			str_size += snprintf(eng_name + str_size,
8234 						PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s",
8235 						razwi_info[i].eng_name);
8236 		else
8237 			str_size += snprintf(eng_name + str_size,
8238 						PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s",
8239 						razwi_info[i].eng_name);
8240 		num_of_eng++;
8241 	}
8242 
8243 	return num_of_eng;
8244 }
8245 
8246 static bool gaudi2_handle_psoc_razwi_happened(struct hl_device *hdev, u32 razwi_reg,
8247 						u64 *event_mask)
8248 {
8249 	u32 axuser_xy = RAZWI_GET_AXUSER_XY(razwi_reg), addr_hi = 0, addr_lo = 0;
8250 	u32 base[PSOC_RAZWI_MAX_ENG_PER_RTR];
8251 	u16 num_of_eng, eng_id[PSOC_RAZWI_MAX_ENG_PER_RTR];
8252 	char eng_name_str[PSOC_RAZWI_ENG_STR_SIZE];
8253 	bool razwi_happened = false;
8254 	int i;
8255 
8256 	num_of_eng = gaudi2_psoc_razwi_get_engines(common_razwi_info, ARRAY_SIZE(common_razwi_info),
8257 							axuser_xy, base, eng_id, eng_name_str);
8258 
8259 	/* If no match for XY coordinates, try to find it in MME razwi table */
8260 	if (!num_of_eng) {
8261 		axuser_xy = RAZWI_GET_AXUSER_LOW_XY(razwi_reg);
8262 		num_of_eng = gaudi2_psoc_razwi_get_engines(mme_razwi_info,
8263 								ARRAY_SIZE(mme_razwi_info),
8264 								axuser_xy, base, eng_id,
8265 								eng_name_str);
8266 	}
8267 
8268 	for  (i = 0 ; i < num_of_eng ; i++) {
8269 		if (RREG32(base[i] + DEC_RAZWI_HBW_AW_SET)) {
8270 			addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_HI);
8271 			addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_LO);
8272 			dev_err(hdev->dev,
8273 					"PSOC HBW AW RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8274 					eng_name_str, ((u64)addr_hi << 32) + addr_lo);
8275 			hl_handle_razwi(hdev, ((u64)addr_hi << 32) + addr_lo, &eng_id[0],
8276 					num_of_eng, HL_RAZWI_HBW | HL_RAZWI_WRITE, event_mask);
8277 			razwi_happened = true;
8278 		}
8279 
8280 		if (RREG32(base[i] + DEC_RAZWI_HBW_AR_SET)) {
8281 			addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_HI);
8282 			addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_LO);
8283 			dev_err(hdev->dev,
8284 					"PSOC HBW AR RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8285 					eng_name_str, ((u64)addr_hi << 32) + addr_lo);
8286 			hl_handle_razwi(hdev, ((u64)addr_hi << 32) + addr_lo, &eng_id[0],
8287 					num_of_eng, HL_RAZWI_HBW | HL_RAZWI_READ, event_mask);
8288 			razwi_happened = true;
8289 		}
8290 
8291 		if (RREG32(base[i] + DEC_RAZWI_LBW_AW_SET)) {
8292 			addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AW_ADDR);
8293 			dev_err(hdev->dev,
8294 					"PSOC LBW AW RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8295 					eng_name_str, addr_lo);
8296 			hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8297 					num_of_eng, HL_RAZWI_LBW | HL_RAZWI_WRITE, event_mask);
8298 			razwi_happened = true;
8299 		}
8300 
8301 		if (RREG32(base[i] + DEC_RAZWI_LBW_AR_SET)) {
8302 			addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AR_ADDR);
8303 			dev_err(hdev->dev,
8304 					"PSOC LBW AR RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8305 					eng_name_str, addr_lo);
8306 			hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8307 					num_of_eng, HL_RAZWI_LBW | HL_RAZWI_READ, event_mask);
8308 			razwi_happened = true;
8309 		}
8310 		/* In common case the loop will break, when there is only one engine id, or
8311 		 * several engines with the same router. The exceptional case is with psoc razwi
8312 		 * from EDMA, where it's possible to get axuser id which fits 2 routers (2
8313 		 * interfaces of sft router). In this case, maybe the first router won't hold info
8314 		 * and we will need to iterate on the other router.
8315 		 */
8316 		if (razwi_happened)
8317 			break;
8318 	}
8319 
8320 	return razwi_happened;
8321 }
8322 
8323 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
8324 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask)
8325 {
8326 	u32 razwi_mask_info, razwi_intr = 0, error_count = 0;
8327 
8328 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
8329 		razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
8330 		if (!razwi_intr)
8331 			return 0;
8332 	}
8333 
8334 	razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
8335 
8336 	dev_err_ratelimited(hdev->dev,
8337 		"PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
8338 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
8339 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
8340 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
8341 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info),
8342 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
8343 
8344 	if (gaudi2_handle_psoc_razwi_happened(hdev, razwi_mask_info, event_mask))
8345 		error_count++;
8346 	else
8347 		dev_err_ratelimited(hdev->dev,
8348 				"PSOC RAZWI interrupt: invalid razwi info (0x%x)\n",
8349 				razwi_mask_info);
8350 
8351 	/* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
8352 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
8353 		WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
8354 
8355 	return error_count;
8356 }
8357 
8358 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type)
8359 {
8360 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8361 
8362 	sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET);
8363 
8364 	for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) {
8365 		if (sts_val & BIT(i)) {
8366 			gaudi2_print_event(hdev, event_type, true,
8367 				"err cause: %s", gaudi2_qm_sei_error_cause[i]);
8368 			sts_clr_val |= BIT(i);
8369 			error_count++;
8370 		}
8371 	}
8372 
8373 	WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val);
8374 
8375 	return error_count;
8376 }
8377 
8378 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
8379 					bool extended_err_check, u64 *event_mask)
8380 {
8381 	enum razwi_event_sources module;
8382 	u32 error_count = 0;
8383 	u64 qman_base;
8384 	u8 index;
8385 
8386 	switch (event_type) {
8387 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP:
8388 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
8389 		qman_base = mmDCORE0_TPC0_QM_BASE +
8390 				(index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
8391 				(index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
8392 		module = RAZWI_TPC;
8393 		break;
8394 	case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
8395 		qman_base = mmDCORE0_TPC6_QM_BASE;
8396 		module = RAZWI_TPC;
8397 		break;
8398 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
8399 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
8400 	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
8401 	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
8402 		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
8403 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
8404 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
8405 		qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
8406 		module = RAZWI_MME;
8407 		break;
8408 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
8409 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
8410 		index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
8411 		qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
8412 		module = RAZWI_PDMA;
8413 		break;
8414 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
8415 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
8416 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
8417 		qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
8418 		module = RAZWI_ROT;
8419 		break;
8420 	default:
8421 		return 0;
8422 	}
8423 
8424 	error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8425 
8426 	/* There is a single event per NIC macro, so should check its both QMAN blocks */
8427 	if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE &&
8428 			event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE)
8429 		error_count += _gaudi2_handle_qm_sei_err(hdev,
8430 					qman_base + NIC_QM_OFFSET, event_type);
8431 
8432 	if (extended_err_check) {
8433 		/* check if RAZWI happened */
8434 		gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, event_mask);
8435 		hl_check_for_glbl_errors(hdev);
8436 	}
8437 
8438 	return error_count;
8439 }
8440 
8441 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8442 {
8443 	u32 qid_base, error_count = 0;
8444 	u64 qman_base;
8445 	u8 index = 0;
8446 
8447 	switch (event_type) {
8448 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
8449 		index = event_type - GAUDI2_EVENT_TPC0_QM;
8450 		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS;
8451 		qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8452 		break;
8453 	case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM:
8454 		index = event_type - GAUDI2_EVENT_TPC6_QM;
8455 		qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS;
8456 		qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8457 		break;
8458 	case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM:
8459 		index = event_type - GAUDI2_EVENT_TPC12_QM;
8460 		qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS;
8461 		qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8462 		break;
8463 	case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM:
8464 		index = event_type - GAUDI2_EVENT_TPC18_QM;
8465 		qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS;
8466 		qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8467 		break;
8468 	case GAUDI2_EVENT_TPC24_QM:
8469 		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
8470 		qman_base = mmDCORE0_TPC6_QM_BASE;
8471 		break;
8472 	case GAUDI2_EVENT_MME0_QM:
8473 		qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
8474 		qman_base = mmDCORE0_MME_QM_BASE;
8475 		break;
8476 	case GAUDI2_EVENT_MME1_QM:
8477 		qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
8478 		qman_base = mmDCORE1_MME_QM_BASE;
8479 		break;
8480 	case GAUDI2_EVENT_MME2_QM:
8481 		qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
8482 		qman_base = mmDCORE2_MME_QM_BASE;
8483 		break;
8484 	case GAUDI2_EVENT_MME3_QM:
8485 		qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
8486 		qman_base = mmDCORE3_MME_QM_BASE;
8487 		break;
8488 	case GAUDI2_EVENT_HDMA0_QM:
8489 		index = 0;
8490 		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0;
8491 		qman_base = mmDCORE0_EDMA0_QM_BASE;
8492 		break;
8493 	case GAUDI2_EVENT_HDMA1_QM:
8494 		index = 1;
8495 		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0;
8496 		qman_base = mmDCORE0_EDMA1_QM_BASE;
8497 		break;
8498 	case GAUDI2_EVENT_HDMA2_QM:
8499 		index = 2;
8500 		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0;
8501 		qman_base = mmDCORE1_EDMA0_QM_BASE;
8502 		break;
8503 	case GAUDI2_EVENT_HDMA3_QM:
8504 		index = 3;
8505 		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0;
8506 		qman_base = mmDCORE1_EDMA1_QM_BASE;
8507 		break;
8508 	case GAUDI2_EVENT_HDMA4_QM:
8509 		index = 4;
8510 		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0;
8511 		qman_base = mmDCORE2_EDMA0_QM_BASE;
8512 		break;
8513 	case GAUDI2_EVENT_HDMA5_QM:
8514 		index = 5;
8515 		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0;
8516 		qman_base = mmDCORE2_EDMA1_QM_BASE;
8517 		break;
8518 	case GAUDI2_EVENT_HDMA6_QM:
8519 		index = 6;
8520 		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0;
8521 		qman_base = mmDCORE3_EDMA0_QM_BASE;
8522 		break;
8523 	case GAUDI2_EVENT_HDMA7_QM:
8524 		index = 7;
8525 		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0;
8526 		qman_base = mmDCORE3_EDMA1_QM_BASE;
8527 		break;
8528 	case GAUDI2_EVENT_PDMA0_QM:
8529 		qid_base = GAUDI2_QUEUE_ID_PDMA_0_0;
8530 		qman_base = mmPDMA0_QM_BASE;
8531 		break;
8532 	case GAUDI2_EVENT_PDMA1_QM:
8533 		qid_base = GAUDI2_QUEUE_ID_PDMA_1_0;
8534 		qman_base = mmPDMA1_QM_BASE;
8535 		break;
8536 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
8537 		qid_base = GAUDI2_QUEUE_ID_ROT_0_0;
8538 		qman_base = mmROT0_QM_BASE;
8539 		break;
8540 	case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
8541 		qid_base = GAUDI2_QUEUE_ID_ROT_1_0;
8542 		qman_base = mmROT1_QM_BASE;
8543 		break;
8544 	default:
8545 		return 0;
8546 	}
8547 
8548 	error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base);
8549 
8550 	/* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
8551 	if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) {
8552 		error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8553 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, index, 0, event_mask);
8554 	}
8555 
8556 	hl_check_for_glbl_errors(hdev);
8557 
8558 	return error_count;
8559 }
8560 
8561 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type)
8562 {
8563 	u32 i, sts_val, sts_clr_val, error_count = 0, arc_farm;
8564 
8565 	for (arc_farm = 0 ; arc_farm < NUM_OF_ARC_FARMS_ARC ; arc_farm++) {
8566 		sts_clr_val = 0;
8567 		sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS +
8568 				(arc_farm * ARC_FARM_OFFSET));
8569 
8570 		for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
8571 			if (sts_val & BIT(i)) {
8572 				gaudi2_print_event(hdev, event_type, true,
8573 						"ARC FARM ARC %u err cause: %s",
8574 						arc_farm, gaudi2_arc_sei_error_cause[i]);
8575 				sts_clr_val |= BIT(i);
8576 				error_count++;
8577 			}
8578 		}
8579 		WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR + (arc_farm * ARC_FARM_OFFSET),
8580 				sts_clr_val);
8581 	}
8582 
8583 	hl_check_for_glbl_errors(hdev);
8584 
8585 	return error_count;
8586 }
8587 
8588 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type)
8589 {
8590 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8591 
8592 	sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS);
8593 
8594 	for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) {
8595 		if (sts_val & BIT(i)) {
8596 			gaudi2_print_event(hdev, event_type, true,
8597 				"err cause: %s", gaudi2_cpu_sei_error_cause[i]);
8598 			sts_clr_val |= BIT(i);
8599 			error_count++;
8600 		}
8601 	}
8602 
8603 	hl_check_for_glbl_errors(hdev);
8604 
8605 	WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val);
8606 
8607 	return error_count;
8608 }
8609 
8610 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type,
8611 					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8612 					u64 *event_mask)
8613 {
8614 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8615 	u32 error_count = 0;
8616 	int i;
8617 
8618 	for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++)
8619 		if (intr_cause_data & BIT(i)) {
8620 			gaudi2_print_event(hdev, event_type, true,
8621 				"err cause: %s", guadi2_rot_error_cause[i]);
8622 			error_count++;
8623 		}
8624 
8625 	/* check if RAZWI happened */
8626 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, event_mask);
8627 	hl_check_for_glbl_errors(hdev);
8628 
8629 	return error_count;
8630 }
8631 
8632 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev,  u8 tpc_index, u16 event_type,
8633 					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8634 					u64 *event_mask)
8635 {
8636 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8637 	u32 error_count = 0;
8638 	int i;
8639 
8640 	for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++)
8641 		if (intr_cause_data & BIT(i)) {
8642 			gaudi2_print_event(hdev, event_type, true,
8643 				"interrupt cause: %s",  gaudi2_tpc_interrupts_cause[i]);
8644 			error_count++;
8645 		}
8646 
8647 	/* check if RAZWI happened */
8648 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, event_mask);
8649 	hl_check_for_glbl_errors(hdev);
8650 
8651 	return error_count;
8652 }
8653 
8654 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type,
8655 					u64 *event_mask)
8656 {
8657 	u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0;
8658 	int i;
8659 
8660 	if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES)
8661 		/* DCORE DEC */
8662 		sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR +
8663 				DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) +
8664 				DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE);
8665 	else
8666 		/* PCIE DEC */
8667 		sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET *
8668 				(dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES);
8669 
8670 	sts_val = RREG32(sts_addr);
8671 
8672 	for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) {
8673 		if (sts_val & BIT(i)) {
8674 			gaudi2_print_event(hdev, event_type, true,
8675 				"err cause: %s", gaudi2_dec_error_cause[i]);
8676 			sts_clr_val |= BIT(i);
8677 			error_count++;
8678 		}
8679 	}
8680 
8681 	/* check if RAZWI happened */
8682 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, event_mask);
8683 	hl_check_for_glbl_errors(hdev);
8684 
8685 	/* Write 1 clear errors */
8686 	WREG32(sts_addr, sts_clr_val);
8687 
8688 	return error_count;
8689 }
8690 
8691 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8692 					u64 *event_mask)
8693 {
8694 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8695 	int i;
8696 
8697 	sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index;
8698 	sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index;
8699 
8700 	sts_val = RREG32(sts_addr);
8701 
8702 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) {
8703 		if (sts_val & BIT(i)) {
8704 			gaudi2_print_event(hdev, event_type, true,
8705 				"err cause: %s", guadi2_mme_error_cause[i]);
8706 			sts_clr_val |= BIT(i);
8707 			error_count++;
8708 		}
8709 	}
8710 
8711 	/* check if RAZWI happened */
8712 	for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
8713 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, event_mask);
8714 
8715 	hl_check_for_glbl_errors(hdev);
8716 
8717 	WREG32(sts_clr_addr, sts_clr_val);
8718 
8719 	return error_count;
8720 }
8721 
8722 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type,
8723 					u64 intr_cause_data)
8724 {
8725 	int i, error_count = 0;
8726 
8727 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
8728 		if (intr_cause_data & BIT(i)) {
8729 			gaudi2_print_event(hdev, event_type, true,
8730 				"err cause: %s", guadi2_mme_sbte_error_cause[i]);
8731 			error_count++;
8732 		}
8733 
8734 	hl_check_for_glbl_errors(hdev);
8735 
8736 	return error_count;
8737 }
8738 
8739 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8740 					u64 *event_mask)
8741 {
8742 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8743 	int i;
8744 
8745 	sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index;
8746 	sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index;
8747 
8748 	sts_val = RREG32(sts_addr);
8749 
8750 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) {
8751 		if (sts_val & BIT(i)) {
8752 			gaudi2_print_event(hdev, event_type, true,
8753 				"err cause: %s", guadi2_mme_wap_error_cause[i]);
8754 			sts_clr_val |= BIT(i);
8755 			error_count++;
8756 		}
8757 	}
8758 
8759 	/* check if RAZWI happened on WAP0/1 */
8760 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, event_mask);
8761 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, event_mask);
8762 	hl_check_for_glbl_errors(hdev);
8763 
8764 	WREG32(sts_clr_addr, sts_clr_val);
8765 
8766 	return error_count;
8767 }
8768 
8769 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type,
8770 					u64 intr_cause_data)
8771 {
8772 	u32 error_count = 0;
8773 	int i;
8774 
8775 	/* If an AXI read or write error is received, an error is reported and
8776 	 * interrupt message is sent. Due to an HW errata, when reading the cause
8777 	 * register of the KDMA engine, the reported error is always HBW even if
8778 	 * the actual error caused by a LBW KDMA transaction.
8779 	 */
8780 	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8781 		if (intr_cause_data & BIT(i)) {
8782 			gaudi2_print_event(hdev, event_type, true,
8783 				"err cause: %s", gaudi2_kdma_core_interrupts_cause[i]);
8784 			error_count++;
8785 		}
8786 
8787 	hl_check_for_glbl_errors(hdev);
8788 
8789 	return error_count;
8790 }
8791 
8792 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, int sts_addr)
8793 {
8794 	u32 error_count = 0, sts_val = RREG32(sts_addr);
8795 	int i;
8796 
8797 	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8798 		if (sts_val & BIT(i)) {
8799 			gaudi2_print_event(hdev, event_type, true,
8800 				"err cause: %s", gaudi2_dma_core_interrupts_cause[i]);
8801 			error_count++;
8802 		}
8803 
8804 	hl_check_for_glbl_errors(hdev);
8805 
8806 	return error_count;
8807 }
8808 
8809 static int gaudi2_handle_pdma_core_event(struct hl_device *hdev, u16 event_type, int pdma_idx)
8810 {
8811 	u32 sts_addr;
8812 
8813 	sts_addr = mmPDMA0_CORE_ERR_CAUSE + pdma_idx * PDMA_OFFSET;
8814 	return gaudi2_handle_dma_core_event(hdev, event_type, sts_addr);
8815 }
8816 
8817 static int gaudi2_handle_edma_core_event(struct hl_device *hdev, u16 event_type, int edma_idx)
8818 {
8819 	static const int edma_event_index_map[] = {2, 3, 0, 1, 6, 7, 4, 5};
8820 	u32 sts_addr, index;
8821 
8822 	index = edma_event_index_map[edma_idx];
8823 
8824 	sts_addr = mmDCORE0_EDMA0_CORE_ERR_CAUSE +
8825 				DCORE_OFFSET * (index / NUM_OF_EDMA_PER_DCORE) +
8826 				DCORE_EDMA_OFFSET * (index % NUM_OF_EDMA_PER_DCORE);
8827 	return gaudi2_handle_dma_core_event(hdev, event_type, sts_addr);
8828 }
8829 
8830 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
8831 {
8832 	u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
8833 
8834 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
8835 	if (RREG32(razwi_happened_addr)) {
8836 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8837 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8838 		WREG32(razwi_happened_addr, 0x1);
8839 	}
8840 
8841 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
8842 	if (RREG32(razwi_happened_addr)) {
8843 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8844 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8845 		WREG32(razwi_happened_addr, 0x1);
8846 	}
8847 
8848 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
8849 	if (RREG32(razwi_happened_addr)) {
8850 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8851 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8852 		WREG32(razwi_happened_addr, 0x1);
8853 	}
8854 
8855 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
8856 	if (RREG32(razwi_happened_addr)) {
8857 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8858 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8859 		WREG32(razwi_happened_addr, 0x1);
8860 	}
8861 }
8862 
8863 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type,
8864 					u64 intr_cause_data, u64 *event_mask)
8865 {
8866 	u32 error_count = 0;
8867 	int i;
8868 
8869 	for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
8870 		if (!(intr_cause_data & BIT_ULL(i)))
8871 			continue;
8872 
8873 		gaudi2_print_event(hdev, event_type, true,
8874 			"err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]);
8875 		error_count++;
8876 
8877 		switch (intr_cause_data & BIT_ULL(i)) {
8878 		case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
8879 			hl_check_for_glbl_errors(hdev);
8880 			break;
8881 		case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
8882 			gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
8883 			break;
8884 		}
8885 	}
8886 
8887 	return error_count;
8888 }
8889 
8890 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type,
8891 				u64 intr_cause_data)
8892 
8893 {
8894 	u32 error_count = 0;
8895 	int i;
8896 
8897 	for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) {
8898 		if (intr_cause_data & BIT_ULL(i)) {
8899 			gaudi2_print_event(hdev, event_type, true,
8900 				"err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]);
8901 			error_count++;
8902 		}
8903 	}
8904 
8905 	return error_count;
8906 }
8907 
8908 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data)
8909 {
8910 	u32 error_count = 0;
8911 	int i;
8912 
8913 	for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) {
8914 		if (intr_cause_data & BIT_ULL(i)) {
8915 			gaudi2_print_event(hdev, event_type, true,
8916 				"err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]);
8917 			error_count++;
8918 		}
8919 	}
8920 
8921 	return error_count;
8922 }
8923 
8924 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu,
8925 					u64 *event_mask)
8926 {
8927 	u32 valid, val;
8928 	u64 addr;
8929 
8930 	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8931 
8932 	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK))
8933 		return;
8934 
8935 	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE));
8936 	addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK;
8937 	addr <<= 32;
8938 	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
8939 
8940 	if (!is_pmmu)
8941 		addr = gaudi2_mmu_descramble_addr(hdev, addr);
8942 
8943 	dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx\n",
8944 				is_pmmu ? "PMMU" : "HMMU", addr);
8945 	hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask);
8946 
8947 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
8948 }
8949 
8950 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
8951 {
8952 	u32 valid, val;
8953 	u64 addr;
8954 
8955 	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8956 
8957 	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK))
8958 		return;
8959 
8960 	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE));
8961 	addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK;
8962 	addr <<= 32;
8963 	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
8964 
8965 	if (!is_pmmu)
8966 		addr = gaudi2_mmu_descramble_addr(hdev, addr);
8967 
8968 	dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
8969 				is_pmmu ? "PMMU" : "HMMU", addr);
8970 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
8971 }
8972 
8973 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type,
8974 						u64 mmu_base, bool is_pmmu, u64 *event_mask)
8975 {
8976 	u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0;
8977 	int i;
8978 
8979 	spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET);
8980 
8981 	for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) {
8982 		if (spi_sei_cause & BIT(i)) {
8983 			gaudi2_print_event(hdev, event_type, true,
8984 				"err cause: %s", gaudi2_mmu_spi_sei[i].cause);
8985 
8986 			if (i == 0)
8987 				gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask);
8988 			else if (i == 1)
8989 				gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
8990 
8991 			if (gaudi2_mmu_spi_sei[i].clear_bit >= 0)
8992 				interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit);
8993 
8994 			error_count++;
8995 		}
8996 	}
8997 
8998 	/* Clear cause */
8999 	WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause);
9000 
9001 	/* Clear interrupt */
9002 	WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
9003 
9004 	return error_count;
9005 }
9006 
9007 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index)
9008 {
9009 	u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log,
9010 		cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0;
9011 	int i;
9012 
9013 	sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
9014 	cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index;
9015 
9016 	sei_cause_val = RREG32(sei_cause_addr);
9017 	sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val);
9018 	cq_intr_val = RREG32(cq_intr_addr);
9019 
9020 	/* SEI interrupt */
9021 	if (sei_cause_cause) {
9022 		/* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */
9023 		sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK,
9024 					sei_cause_val);
9025 
9026 		for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) {
9027 			if (!(sei_cause_cause & BIT(i)))
9028 				continue;
9029 
9030 			gaudi2_print_event(hdev, event_type, true,
9031 				"err cause: %s. %s: 0x%X",
9032 				gaudi2_sm_sei_cause[i].cause_name,
9033 				gaudi2_sm_sei_cause[i].log_name,
9034 				sei_cause_log);
9035 			error_count++;
9036 			break;
9037 		}
9038 
9039 		/* Clear SM_SEI_CAUSE */
9040 		WREG32(sei_cause_addr, 0);
9041 	}
9042 
9043 	/* CQ interrupt */
9044 	if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) {
9045 		cq_intr_queue_index =
9046 				FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK,
9047 					cq_intr_val);
9048 
9049 		dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n",
9050 				sm_index, cq_intr_queue_index);
9051 		error_count++;
9052 
9053 		/* Clear CQ_INTR */
9054 		WREG32(cq_intr_addr, 0);
9055 	}
9056 
9057 	hl_check_for_glbl_errors(hdev);
9058 
9059 	return error_count;
9060 }
9061 
9062 static u64 get_hmmu_base(u16 event_type)
9063 {
9064 	u8 dcore, index_in_dcore;
9065 
9066 	switch (event_type) {
9067 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP:
9068 	case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU0_SECURITY_ERROR:
9069 		dcore = 0;
9070 		index_in_dcore = 0;
9071 	break;
9072 	case GAUDI2_EVENT_HMMU_1_AXI_ERR_RSP:
9073 	case GAUDI2_EVENT_HMMU1_SPI_BASE ... GAUDI2_EVENT_HMMU1_SECURITY_ERROR:
9074 		dcore = 1;
9075 		index_in_dcore = 0;
9076 	break;
9077 	case GAUDI2_EVENT_HMMU_2_AXI_ERR_RSP:
9078 	case GAUDI2_EVENT_HMMU2_SPI_BASE ... GAUDI2_EVENT_HMMU2_SECURITY_ERROR:
9079 		dcore = 0;
9080 		index_in_dcore = 1;
9081 	break;
9082 	case GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
9083 	case GAUDI2_EVENT_HMMU3_SPI_BASE ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
9084 		dcore = 1;
9085 		index_in_dcore = 1;
9086 	break;
9087 	case GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
9088 	case GAUDI2_EVENT_HMMU4_SPI_BASE ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
9089 		dcore = 3;
9090 		index_in_dcore = 2;
9091 	break;
9092 	case GAUDI2_EVENT_HMMU_5_AXI_ERR_RSP:
9093 	case GAUDI2_EVENT_HMMU5_SPI_BASE ... GAUDI2_EVENT_HMMU5_SECURITY_ERROR:
9094 		dcore = 2;
9095 		index_in_dcore = 2;
9096 	break;
9097 	case GAUDI2_EVENT_HMMU_6_AXI_ERR_RSP:
9098 	case GAUDI2_EVENT_HMMU6_SPI_BASE ... GAUDI2_EVENT_HMMU6_SECURITY_ERROR:
9099 		dcore = 3;
9100 		index_in_dcore = 3;
9101 	break;
9102 	case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP:
9103 	case GAUDI2_EVENT_HMMU7_SPI_BASE ... GAUDI2_EVENT_HMMU7_SECURITY_ERROR:
9104 		dcore = 2;
9105 		index_in_dcore = 3;
9106 	break;
9107 	case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP:
9108 	case GAUDI2_EVENT_HMMU8_SPI_BASE ... GAUDI2_EVENT_HMMU8_SECURITY_ERROR:
9109 		dcore = 0;
9110 		index_in_dcore = 2;
9111 	break;
9112 	case GAUDI2_EVENT_HMMU_9_AXI_ERR_RSP:
9113 	case GAUDI2_EVENT_HMMU9_SPI_BASE ... GAUDI2_EVENT_HMMU9_SECURITY_ERROR:
9114 		dcore = 1;
9115 		index_in_dcore = 2;
9116 	break;
9117 	case GAUDI2_EVENT_HMMU_10_AXI_ERR_RSP:
9118 	case GAUDI2_EVENT_HMMU10_SPI_BASE ... GAUDI2_EVENT_HMMU10_SECURITY_ERROR:
9119 		dcore = 0;
9120 		index_in_dcore = 3;
9121 	break;
9122 	case GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
9123 	case GAUDI2_EVENT_HMMU11_SPI_BASE ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
9124 		dcore = 1;
9125 		index_in_dcore = 3;
9126 	break;
9127 	case GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9128 	case GAUDI2_EVENT_HMMU12_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9129 		dcore = 3;
9130 		index_in_dcore = 0;
9131 	break;
9132 	case GAUDI2_EVENT_HMMU_13_AXI_ERR_RSP:
9133 	case GAUDI2_EVENT_HMMU13_SPI_BASE ... GAUDI2_EVENT_HMMU13_SECURITY_ERROR:
9134 		dcore = 2;
9135 		index_in_dcore = 0;
9136 	break;
9137 	case GAUDI2_EVENT_HMMU_14_AXI_ERR_RSP:
9138 	case GAUDI2_EVENT_HMMU14_SPI_BASE ... GAUDI2_EVENT_HMMU14_SECURITY_ERROR:
9139 		dcore = 3;
9140 		index_in_dcore = 1;
9141 	break;
9142 	case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP:
9143 	case GAUDI2_EVENT_HMMU15_SPI_BASE ... GAUDI2_EVENT_HMMU15_SECURITY_ERROR:
9144 		dcore = 2;
9145 		index_in_dcore = 1;
9146 	break;
9147 	default:
9148 		return ULONG_MAX;
9149 	}
9150 
9151 	return mmDCORE0_HMMU0_MMU_BASE + dcore * DCORE_OFFSET + index_in_dcore * DCORE_HMMU_OFFSET;
9152 }
9153 
9154 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9155 {
9156 	bool is_pmmu = false;
9157 	u32 error_count = 0;
9158 	u64 mmu_base;
9159 
9160 	switch (event_type) {
9161 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9162 	case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9163 		mmu_base = get_hmmu_base(event_type);
9164 		break;
9165 
9166 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9167 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9168 		is_pmmu = true;
9169 		mmu_base = mmPMMU_HBW_MMU_BASE;
9170 		break;
9171 	default:
9172 		return 0;
9173 	}
9174 
9175 	if (mmu_base == ULONG_MAX)
9176 		return 0;
9177 
9178 	error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base,
9179 							is_pmmu, event_mask);
9180 	hl_check_for_glbl_errors(hdev);
9181 
9182 	return error_count;
9183 }
9184 
9185 
9186 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */
9187 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
9188 			struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
9189 {
9190 	u32 addr, beat, beat_shift;
9191 	bool rc = false;
9192 
9193 	dev_err_ratelimited(hdev->dev,
9194 			"READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
9195 			FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt),
9196 			FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt),
9197 			FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt));
9198 
9199 	addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val);
9200 	dev_err_ratelimited(hdev->dev,
9201 			"READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n",
9202 			FIELD_GET(HBM_RD_ADDR_SID_MASK, addr),
9203 			FIELD_GET(HBM_RD_ADDR_BG_MASK, addr),
9204 			FIELD_GET(HBM_RD_ADDR_BA_MASK, addr),
9205 			FIELD_GET(HBM_RD_ADDR_COL_MASK, addr),
9206 			FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr));
9207 
9208 	/* For each beat (RDQS edge), look for possible errors and print relevant info */
9209 	for (beat = 0 ; beat < 4 ; beat++) {
9210 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9211 			(HBM_RD_ERR_SERR_BEAT0_MASK << beat))
9212 			dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n",
9213 						beat,
9214 						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9215 						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9216 
9217 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9218 			(HBM_RD_ERR_DERR_BEAT0_MASK << beat)) {
9219 			dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n",
9220 						beat,
9221 						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9222 						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9223 			rc |= true;
9224 		}
9225 
9226 		beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
9227 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9228 			(HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) {
9229 			dev_err_ratelimited(hdev->dev,
9230 					"Beat%d read PARITY: DM: %#x, PAR data: %#x\n",
9231 					beat,
9232 					le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9233 					(le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9234 						(HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
9235 						(HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
9236 			rc |= true;
9237 		}
9238 
9239 		dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
9240 		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9241 					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2]));
9242 		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9243 					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
9244 	}
9245 
9246 	return rc;
9247 }
9248 
9249 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
9250 			struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt)
9251 {
9252 	struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds;
9253 	u32 i, curr_addr, derr = wr_par_err_data->dbg_derr;
9254 
9255 	dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt);
9256 
9257 	dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n",
9258 				derr & 0x3, derr & 0xc);
9259 
9260 	/* JIRA H6-3286 - the following prints may not be valid */
9261 	dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n");
9262 	for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) {
9263 		curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr);
9264 		dev_err_ratelimited(hdev->dev,
9265 				"\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n",
9266 				i,
9267 				FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr),
9268 				FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr),
9269 				FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr),
9270 				FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr));
9271 	}
9272 }
9273 
9274 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev,
9275 		struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt)
9276 {
9277 	__le32 *col_cmd = ca_par_err_data->dbg_col;
9278 	__le16 *row_cmd = ca_par_err_data->dbg_row;
9279 	u32 i;
9280 
9281 	dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt);
9282 
9283 	dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n");
9284 	for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++)
9285 		dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i,
9286 			le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0),
9287 			le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0));
9288 }
9289 
9290 /* Returns true if hard reset is needed or false otherwise */
9291 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
9292 					struct hl_eq_hbm_sei_data *sei_data)
9293 {
9294 	bool require_hard_reset = false;
9295 	u32 hbm_id, mc_id, cause_idx;
9296 
9297 	hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4;
9298 	mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2;
9299 
9300 	cause_idx = sei_data->hdr.sei_cause;
9301 	if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
9302 		gaudi2_print_event(hdev, event_type, true,
9303 			"err cause: %s",
9304 			"Invalid HBM SEI event cause (%d) provided by FW", cause_idx);
9305 		return true;
9306 	}
9307 
9308 	gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical,
9309 		"System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s",
9310 		sei_data->hdr.is_critical ? "Critical" : "Non-critical",
9311 		hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
9312 		hbm_mc_sei_cause[cause_idx]);
9313 
9314 	/* Print error-specific info */
9315 	switch (cause_idx) {
9316 	case HBM_SEI_CATTRIP:
9317 		require_hard_reset = true;
9318 		break;
9319 
9320 	case  HBM_SEI_CMD_PARITY_EVEN:
9321 		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info,
9322 						le32_to_cpu(sei_data->hdr.cnt));
9323 		require_hard_reset = true;
9324 		break;
9325 
9326 	case  HBM_SEI_CMD_PARITY_ODD:
9327 		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info,
9328 						le32_to_cpu(sei_data->hdr.cnt));
9329 		require_hard_reset = true;
9330 		break;
9331 
9332 	case HBM_SEI_WRITE_DATA_PARITY_ERR:
9333 		gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info,
9334 						le32_to_cpu(sei_data->hdr.cnt));
9335 		require_hard_reset = true;
9336 		break;
9337 
9338 	case HBM_SEI_READ_ERR:
9339 		/* Unlike other SEI events, read error requires further processing of the
9340 		 * raw data in order to determine the root cause.
9341 		 */
9342 		require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev,
9343 								&sei_data->read_err_info,
9344 								le32_to_cpu(sei_data->hdr.cnt));
9345 		break;
9346 
9347 	default:
9348 		break;
9349 	}
9350 
9351 	require_hard_reset |= !!sei_data->hdr.is_critical;
9352 
9353 	return require_hard_reset;
9354 }
9355 
9356 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type,
9357 				u64 intr_cause_data)
9358 {
9359 	if (intr_cause_data) {
9360 		gaudi2_print_event(hdev, event_type, true,
9361 			"temperature error cause: %#llx", intr_cause_data);
9362 		return 1;
9363 	}
9364 
9365 	return 0;
9366 }
9367 
9368 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data)
9369 {
9370 	u32 i, error_count = 0;
9371 
9372 	for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++)
9373 		if (intr_cause_data & hbm_mc_spi[i].mask) {
9374 			dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n",
9375 				hbm_mc_spi[i].cause);
9376 			error_count++;
9377 		}
9378 
9379 	return error_count;
9380 }
9381 
9382 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9383 {
9384 	ktime_t zero_time = ktime_set(0, 0);
9385 
9386 	mutex_lock(&hdev->clk_throttling.lock);
9387 
9388 	switch (event_type) {
9389 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9390 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
9391 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
9392 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
9393 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
9394 		dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
9395 		break;
9396 
9397 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9398 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
9399 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
9400 		dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
9401 		break;
9402 
9403 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9404 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
9405 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
9406 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
9407 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
9408 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9409 		dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
9410 		break;
9411 
9412 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9413 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
9414 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
9415 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9416 		dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
9417 		break;
9418 
9419 	default:
9420 		dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
9421 		break;
9422 	}
9423 
9424 	mutex_unlock(&hdev->clk_throttling.lock);
9425 }
9426 
9427 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type,
9428 					struct cpucp_pkt_sync_err *sync_err)
9429 {
9430 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9431 
9432 	gaudi2_print_event(hdev, event_type, false,
9433 		"FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9434 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci),
9435 		q->pi, atomic_read(&q->ci));
9436 }
9437 
9438 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
9439 {
9440 	u32 p2p_intr, msix_gw_intr, error_count = 0;
9441 
9442 	p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR);
9443 	msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR);
9444 
9445 	if (p2p_intr) {
9446 		gaudi2_print_event(hdev, event_type, true,
9447 			"pcie p2p transaction terminated due to security, req_id(0x%x)",
9448 			RREG32(mmPCIE_WRAP_P2P_REQ_ID));
9449 
9450 		WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
9451 		error_count++;
9452 	}
9453 
9454 	if (msix_gw_intr) {
9455 		gaudi2_print_event(hdev, event_type, true,
9456 			"pcie msi-x gen denied due to vector num check failure, vec(0x%X)",
9457 			RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
9458 
9459 		WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
9460 		error_count++;
9461 	}
9462 
9463 	return error_count;
9464 }
9465 
9466 static int gaudi2_handle_pcie_drain(struct hl_device *hdev,
9467 			struct hl_eq_pcie_drain_ind_data *drain_data)
9468 {
9469 	u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0;
9470 
9471 	cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
9472 	lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw);
9473 	lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw);
9474 	hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw);
9475 	hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw);
9476 
9477 	if (cause & BIT_ULL(0)) {
9478 		dev_err_ratelimited(hdev->dev,
9479 			"PCIE AXI drain LBW completed, read_err %u, write_err %u\n",
9480 			!!lbw_rd, !!lbw_wr);
9481 		error_count++;
9482 	}
9483 
9484 	if (cause & BIT_ULL(1)) {
9485 		dev_err_ratelimited(hdev->dev,
9486 			"PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n",
9487 			hbw_rd, hbw_wr);
9488 		error_count++;
9489 	}
9490 
9491 	return error_count;
9492 }
9493 
9494 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data)
9495 {
9496 	u32 error_count = 0;
9497 	int i;
9498 
9499 	for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) {
9500 		if (intr_cause_data & BIT_ULL(i)) {
9501 			dev_err_ratelimited(hdev->dev, "PSOC %s completed\n",
9502 				gaudi2_psoc_axi_drain_interrupts_cause[i]);
9503 			error_count++;
9504 		}
9505 	}
9506 
9507 	hl_check_for_glbl_errors(hdev);
9508 
9509 	return error_count;
9510 }
9511 
9512 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type,
9513 					struct cpucp_pkt_sync_err *sync_err)
9514 {
9515 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9516 
9517 	gaudi2_print_event(hdev, event_type, false,
9518 		"FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9519 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
9520 }
9521 
9522 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
9523 					struct hl_eq_engine_arc_intr_data *data)
9524 {
9525 	struct hl_engine_arc_dccm_queue_full_irq *q;
9526 	u32 intr_type, engine_id;
9527 	u64 payload;
9528 
9529 	intr_type = le32_to_cpu(data->intr_type);
9530 	engine_id = le32_to_cpu(data->engine_id);
9531 	payload = le64_to_cpu(data->payload);
9532 
9533 	switch (intr_type) {
9534 	case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ:
9535 		q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
9536 
9537 		gaudi2_print_event(hdev, event_type, true,
9538 				"ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u",
9539 				engine_id, intr_type, q->queue_index);
9540 		return 1;
9541 	default:
9542 		gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type");
9543 		return 0;
9544 	}
9545 }
9546 
9547 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
9548 {
9549 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9550 	bool reset_required = false, is_critical = false;
9551 	u32 index, ctl, reset_flags = 0, error_count = 0;
9552 	u64 event_mask = 0;
9553 	u16 event_type;
9554 
9555 	ctl = le32_to_cpu(eq_entry->hdr.ctl);
9556 	event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
9557 
9558 	if (event_type >= GAUDI2_EVENT_SIZE) {
9559 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
9560 				event_type, GAUDI2_EVENT_SIZE - 1);
9561 		return;
9562 	}
9563 
9564 	gaudi2->events_stat[event_type]++;
9565 	gaudi2->events_stat_aggregate[event_type]++;
9566 
9567 	switch (event_type) {
9568 	case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR:
9569 		fallthrough;
9570 	case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
9571 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9572 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9573 		reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
9574 		is_critical = eq_entry->ecc_data.is_critical;
9575 		error_count++;
9576 		break;
9577 
9578 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
9579 		fallthrough;
9580 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM:
9581 		fallthrough;
9582 	case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
9583 		error_count = gaudi2_handle_qman_err(hdev, event_type, &event_mask);
9584 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9585 		break;
9586 
9587 	case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
9588 		error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type);
9589 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9590 		break;
9591 
9592 	case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
9593 		error_count = gaudi2_handle_cpu_sei_err(hdev, event_type);
9594 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9595 		event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR;
9596 		break;
9597 
9598 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
9599 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
9600 		error_count = gaudi2_handle_qm_sei_err(hdev, event_type, true, &event_mask);
9601 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9602 		break;
9603 
9604 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
9605 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
9606 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
9607 		error_count = gaudi2_handle_rot_err(hdev, index, event_type,
9608 					&eq_entry->razwi_with_intr_cause, &event_mask);
9609 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9610 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9611 		break;
9612 
9613 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
9614 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
9615 		error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9616 						&eq_entry->razwi_with_intr_cause, &event_mask);
9617 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9618 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9619 		break;
9620 
9621 	case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
9622 		index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
9623 		error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9624 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9625 		break;
9626 
9627 	case GAUDI2_EVENT_TPC0_KERNEL_ERR:
9628 	case GAUDI2_EVENT_TPC1_KERNEL_ERR:
9629 	case GAUDI2_EVENT_TPC2_KERNEL_ERR:
9630 	case GAUDI2_EVENT_TPC3_KERNEL_ERR:
9631 	case GAUDI2_EVENT_TPC4_KERNEL_ERR:
9632 	case GAUDI2_EVENT_TPC5_KERNEL_ERR:
9633 	case GAUDI2_EVENT_TPC6_KERNEL_ERR:
9634 	case GAUDI2_EVENT_TPC7_KERNEL_ERR:
9635 	case GAUDI2_EVENT_TPC8_KERNEL_ERR:
9636 	case GAUDI2_EVENT_TPC9_KERNEL_ERR:
9637 	case GAUDI2_EVENT_TPC10_KERNEL_ERR:
9638 	case GAUDI2_EVENT_TPC11_KERNEL_ERR:
9639 	case GAUDI2_EVENT_TPC12_KERNEL_ERR:
9640 	case GAUDI2_EVENT_TPC13_KERNEL_ERR:
9641 	case GAUDI2_EVENT_TPC14_KERNEL_ERR:
9642 	case GAUDI2_EVENT_TPC15_KERNEL_ERR:
9643 	case GAUDI2_EVENT_TPC16_KERNEL_ERR:
9644 	case GAUDI2_EVENT_TPC17_KERNEL_ERR:
9645 	case GAUDI2_EVENT_TPC18_KERNEL_ERR:
9646 	case GAUDI2_EVENT_TPC19_KERNEL_ERR:
9647 	case GAUDI2_EVENT_TPC20_KERNEL_ERR:
9648 	case GAUDI2_EVENT_TPC21_KERNEL_ERR:
9649 	case GAUDI2_EVENT_TPC22_KERNEL_ERR:
9650 	case GAUDI2_EVENT_TPC23_KERNEL_ERR:
9651 	case GAUDI2_EVENT_TPC24_KERNEL_ERR:
9652 		index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
9653 			(GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
9654 		error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9655 					&eq_entry->razwi_with_intr_cause, &event_mask);
9656 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9657 		break;
9658 
9659 	case GAUDI2_EVENT_DEC0_SPI:
9660 	case GAUDI2_EVENT_DEC1_SPI:
9661 	case GAUDI2_EVENT_DEC2_SPI:
9662 	case GAUDI2_EVENT_DEC3_SPI:
9663 	case GAUDI2_EVENT_DEC4_SPI:
9664 	case GAUDI2_EVENT_DEC5_SPI:
9665 	case GAUDI2_EVENT_DEC6_SPI:
9666 	case GAUDI2_EVENT_DEC7_SPI:
9667 	case GAUDI2_EVENT_DEC8_SPI:
9668 	case GAUDI2_EVENT_DEC9_SPI:
9669 		index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
9670 				(GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
9671 		error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9672 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9673 		break;
9674 
9675 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
9676 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
9677 	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
9678 	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
9679 		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
9680 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
9681 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
9682 		error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9683 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9684 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9685 		break;
9686 
9687 	case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
9688 	case GAUDI2_EVENT_MME1_QMAN_SW_ERROR:
9689 	case GAUDI2_EVENT_MME2_QMAN_SW_ERROR:
9690 	case GAUDI2_EVENT_MME3_QMAN_SW_ERROR:
9691 		index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
9692 				(GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
9693 					GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
9694 		error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9695 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9696 		break;
9697 
9698 	case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
9699 	case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
9700 	case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
9701 	case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
9702 		index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
9703 				(GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
9704 					GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
9705 		error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type, &event_mask);
9706 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9707 		break;
9708 
9709 	case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
9710 	case GAUDI2_EVENT_KDMA0_CORE:
9711 		error_count = gaudi2_handle_kdma_core_event(hdev, event_type,
9712 					le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9713 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9714 		break;
9715 
9716 	case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_HDMA5_CORE:
9717 		index = event_type - GAUDI2_EVENT_HDMA2_CORE;
9718 		error_count = gaudi2_handle_edma_core_event(hdev, event_type, index);
9719 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9720 		break;
9721 
9722 	case GAUDI2_EVENT_PDMA0_CORE ... GAUDI2_EVENT_PDMA1_CORE:
9723 		index = event_type - GAUDI2_EVENT_PDMA0_CORE;
9724 		error_count = gaudi2_handle_pdma_core_event(hdev, event_type, index);
9725 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9726 		break;
9727 
9728 	case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
9729 		error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type,
9730 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask);
9731 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9732 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9733 		break;
9734 
9735 	case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9736 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9737 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9738 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9739 		error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask);
9740 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9741 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9742 		break;
9743 
9744 	case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
9745 		error_count = gaudi2_handle_hif_fatal(hdev, event_type,
9746 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9747 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9748 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9749 		break;
9750 
9751 	case GAUDI2_EVENT_PMMU_FATAL_0:
9752 		error_count = gaudi2_handle_pif_fatal(hdev, event_type,
9753 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9754 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9755 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9756 		break;
9757 
9758 	case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
9759 		error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask);
9760 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9761 		break;
9762 
9763 	case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
9764 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9765 		if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
9766 			reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9767 			reset_required = true;
9768 		}
9769 		error_count++;
9770 		break;
9771 
9772 	case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
9773 		error_count = gaudi2_handle_hbm_cattrip(hdev, event_type,
9774 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9775 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9776 		break;
9777 
9778 	case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
9779 		error_count = gaudi2_handle_hbm_mc_spi(hdev,
9780 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9781 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9782 		break;
9783 
9784 	case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
9785 		error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
9786 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9787 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9788 		break;
9789 
9790 	case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
9791 		error_count = gaudi2_handle_psoc_drain(hdev,
9792 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9793 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9794 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9795 		break;
9796 
9797 	case GAUDI2_EVENT_CPU_AXI_ECC:
9798 		error_count = GAUDI2_NA_EVENT_CAUSE;
9799 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9800 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9801 		break;
9802 	case GAUDI2_EVENT_CPU_L2_RAM_ECC:
9803 		error_count = GAUDI2_NA_EVENT_CAUSE;
9804 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9805 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9806 		break;
9807 	case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
9808 	case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
9809 	case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
9810 	case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
9811 		error_count = gaudi2_handle_mme_sbte_err(hdev, event_type,
9812 						le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9813 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9814 		break;
9815 	case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
9816 		error_count = GAUDI2_NA_EVENT_CAUSE;
9817 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9818 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9819 		break;
9820 	case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
9821 		error_count = GAUDI2_NA_EVENT_CAUSE;
9822 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9823 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9824 		break;
9825 	case GAUDI2_EVENT_PSOC_PRSTN_FALL:
9826 		error_count = GAUDI2_NA_EVENT_CAUSE;
9827 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9828 		break;
9829 	case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
9830 		error_count = GAUDI2_NA_EVENT_CAUSE;
9831 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9832 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9833 		break;
9834 	case GAUDI2_EVENT_PCIE_FATAL_ERR:
9835 		error_count = GAUDI2_NA_EVENT_CAUSE;
9836 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9837 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9838 		break;
9839 	case GAUDI2_EVENT_TPC0_BMON_SPMU:
9840 	case GAUDI2_EVENT_TPC1_BMON_SPMU:
9841 	case GAUDI2_EVENT_TPC2_BMON_SPMU:
9842 	case GAUDI2_EVENT_TPC3_BMON_SPMU:
9843 	case GAUDI2_EVENT_TPC4_BMON_SPMU:
9844 	case GAUDI2_EVENT_TPC5_BMON_SPMU:
9845 	case GAUDI2_EVENT_TPC6_BMON_SPMU:
9846 	case GAUDI2_EVENT_TPC7_BMON_SPMU:
9847 	case GAUDI2_EVENT_TPC8_BMON_SPMU:
9848 	case GAUDI2_EVENT_TPC9_BMON_SPMU:
9849 	case GAUDI2_EVENT_TPC10_BMON_SPMU:
9850 	case GAUDI2_EVENT_TPC11_BMON_SPMU:
9851 	case GAUDI2_EVENT_TPC12_BMON_SPMU:
9852 	case GAUDI2_EVENT_TPC13_BMON_SPMU:
9853 	case GAUDI2_EVENT_TPC14_BMON_SPMU:
9854 	case GAUDI2_EVENT_TPC15_BMON_SPMU:
9855 	case GAUDI2_EVENT_TPC16_BMON_SPMU:
9856 	case GAUDI2_EVENT_TPC17_BMON_SPMU:
9857 	case GAUDI2_EVENT_TPC18_BMON_SPMU:
9858 	case GAUDI2_EVENT_TPC19_BMON_SPMU:
9859 	case GAUDI2_EVENT_TPC20_BMON_SPMU:
9860 	case GAUDI2_EVENT_TPC21_BMON_SPMU:
9861 	case GAUDI2_EVENT_TPC22_BMON_SPMU:
9862 	case GAUDI2_EVENT_TPC23_BMON_SPMU:
9863 	case GAUDI2_EVENT_TPC24_BMON_SPMU:
9864 	case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU:
9865 	case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU:
9866 	case GAUDI2_EVENT_MME0_WAP_BMON_SPMU:
9867 	case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU:
9868 	case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU:
9869 	case GAUDI2_EVENT_MME1_WAP_BMON_SPMU:
9870 	case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU:
9871 	case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU:
9872 	case GAUDI2_EVENT_MME2_WAP_BMON_SPMU:
9873 	case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU:
9874 	case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU:
9875 	case GAUDI2_EVENT_MME3_WAP_BMON_SPMU:
9876 	case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU:
9877 		fallthrough;
9878 	case GAUDI2_EVENT_DEC0_BMON_SPMU:
9879 	case GAUDI2_EVENT_DEC1_BMON_SPMU:
9880 	case GAUDI2_EVENT_DEC2_BMON_SPMU:
9881 	case GAUDI2_EVENT_DEC3_BMON_SPMU:
9882 	case GAUDI2_EVENT_DEC4_BMON_SPMU:
9883 	case GAUDI2_EVENT_DEC5_BMON_SPMU:
9884 	case GAUDI2_EVENT_DEC6_BMON_SPMU:
9885 	case GAUDI2_EVENT_DEC7_BMON_SPMU:
9886 	case GAUDI2_EVENT_DEC8_BMON_SPMU:
9887 	case GAUDI2_EVENT_DEC9_BMON_SPMU:
9888 	case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
9889 		error_count = GAUDI2_NA_EVENT_CAUSE;
9890 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9891 		break;
9892 
9893 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9894 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9895 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9896 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9897 		gaudi2_print_clk_change_info(hdev, event_type, &event_mask);
9898 		error_count = GAUDI2_NA_EVENT_CAUSE;
9899 		break;
9900 
9901 	case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
9902 		gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err);
9903 		error_count = GAUDI2_NA_EVENT_CAUSE;
9904 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9905 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9906 		break;
9907 
9908 	case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
9909 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9910 		error_count = GAUDI2_NA_EVENT_CAUSE;
9911 		/* Do nothing- FW will handle it */
9912 		break;
9913 
9914 	case GAUDI2_EVENT_PCIE_P2P_MSIX:
9915 		error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type);
9916 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9917 		break;
9918 
9919 	case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
9920 		index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
9921 		error_count = gaudi2_handle_sm_err(hdev, event_type, index);
9922 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9923 		break;
9924 
9925 	case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
9926 		error_count = GAUDI2_NA_EVENT_CAUSE;
9927 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9928 		break;
9929 
9930 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
9931 		dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
9932 						le64_to_cpu(eq_entry->data[0]));
9933 		error_count = GAUDI2_NA_EVENT_CAUSE;
9934 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9935 		break;
9936 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
9937 		dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
9938 						le64_to_cpu(eq_entry->data[0]));
9939 		error_count = GAUDI2_NA_EVENT_CAUSE;
9940 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9941 		break;
9942 
9943 	case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
9944 		gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err);
9945 		error_count = GAUDI2_NA_EVENT_CAUSE;
9946 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9947 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9948 		break;
9949 
9950 	case GAUDI2_EVENT_ARC_DCCM_FULL:
9951 		error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data);
9952 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9953 		break;
9954 
9955 	case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
9956 	case GAUDI2_EVENT_CPU_DEV_RESET_REQ:
9957 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9958 		error_count = GAUDI2_NA_EVENT_CAUSE;
9959 		is_critical = true;
9960 		break;
9961 
9962 	default:
9963 		if (gaudi2_irq_map_table[event_type].valid) {
9964 			dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
9965 						event_type);
9966 			error_count = GAUDI2_NA_EVENT_CAUSE;
9967 		}
9968 	}
9969 
9970 	/* Make sure to dump an error in case no error cause was printed so far.
9971 	 * Note that although we have counted the errors, we use this number as
9972 	 * a boolean.
9973 	 */
9974 	if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type))
9975 		gaudi2_print_event(hdev, event_type, true, "%d", event_type);
9976 	else if (error_count == 0)
9977 		gaudi2_print_event(hdev, event_type, true,
9978 				"No error cause for H/W event %u", event_type);
9979 
9980 	if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) ||
9981 				reset_required) {
9982 		if (reset_required ||
9983 				(gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD))
9984 			reset_flags |= HL_DRV_RESET_HARD;
9985 
9986 		if (hdev->hard_reset_on_fw_events ||
9987 				(hdev->asic_prop.fw_security_enabled && is_critical))
9988 			goto reset_device;
9989 	}
9990 
9991 	/* Send unmask irq only for interrupts not classified as MSG */
9992 	if (!gaudi2_irq_map_table[event_type].msg)
9993 		hl_fw_unmask_irq(hdev, event_type);
9994 
9995 	if (event_mask)
9996 		hl_notifier_event_send_all(hdev, event_mask);
9997 
9998 	return;
9999 
10000 reset_device:
10001 	if (hdev->asic_prop.fw_security_enabled && is_critical) {
10002 		reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
10003 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
10004 	} else {
10005 		reset_flags |= HL_DRV_RESET_DELAY;
10006 	}
10007 	/* escalate general hw errors to critical/fatal error */
10008 	if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
10009 		hl_handle_critical_hw_err(hdev, event_type, &event_mask);
10010 
10011 	event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
10012 	hl_device_cond_reset(hdev, reset_flags, event_mask);
10013 }
10014 
10015 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
10016 			struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr,
10017 			u32 hw_queue_id, u32 size, u64 addr, u32 val)
10018 {
10019 	u32 ctl, pkt_size;
10020 	int rc = 0;
10021 
10022 	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
10023 	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
10024 	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1);
10025 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1);
10026 
10027 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
10028 	lin_dma_pkt->src_addr = cpu_to_le64(val);
10029 	lin_dma_pkt->dst_addr = cpu_to_le64(addr);
10030 	lin_dma_pkt->tsize = cpu_to_le32(size);
10031 
10032 	pkt_size = sizeof(struct packet_lin_dma);
10033 
10034 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
10035 	if (rc)
10036 		dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n",
10037 				hw_queue_id);
10038 
10039 	return rc;
10040 }
10041 
10042 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
10043 {
10044 	u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
10045 					GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
10046 					GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
10047 					GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
10048 	u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
10049 		old_mmubp, mmubp, num_of_pkts, busy, pkt_size;
10050 	u64 comp_addr, cur_addr = addr, end_addr = addr + size;
10051 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10052 	void *lin_dma_pkts_arr;
10053 	dma_addr_t pkt_dma_addr;
10054 	int rc = 0, dma_num = 0;
10055 
10056 	if (prop->edma_enabled_mask == 0) {
10057 		dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
10058 		return -EIO;
10059 	}
10060 
10061 	sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10062 	sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
10063 	comp_addr = CFG_BASE + sob_addr;
10064 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
10065 		FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
10066 	mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
10067 		FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
10068 
10069 	/* Calculate how many lin dma pkts we'll need */
10070 	num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
10071 	pkt_size = sizeof(struct packet_lin_dma);
10072 
10073 	lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts,
10074 					&pkt_dma_addr, GFP_KERNEL);
10075 	if (!lin_dma_pkts_arr)
10076 		return -ENOMEM;
10077 
10078 	/*
10079 	 * set mmu bypass for the scrubbing - all ddmas are configured the same so save
10080 	 * only the first one to restore later
10081 	 * also set the sob addr for all edma cores for completion.
10082 	 * set QM as trusted to allow it to access physical address with MMU bp.
10083 	 */
10084 	old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
10085 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10086 		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10087 			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10088 			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10089 
10090 			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10091 				continue;
10092 
10093 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
10094 					edma_offset, mmubp);
10095 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
10096 					lower_32_bits(comp_addr));
10097 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
10098 					upper_32_bits(comp_addr));
10099 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
10100 					comp_val);
10101 			gaudi2_qman_set_test_mode(hdev,
10102 					edma_queues_id[dcore] + 4 * edma_idx, true);
10103 		}
10104 	}
10105 
10106 	WREG32(sob_addr, 0);
10107 
10108 	while (cur_addr < end_addr) {
10109 		for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10110 			for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10111 				u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10112 
10113 				if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10114 					continue;
10115 
10116 				chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
10117 
10118 				rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
10119 					(struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
10120 					pkt_dma_addr + dma_num * pkt_size,
10121 					edma_queues_id[dcore] + edma_idx * 4,
10122 					chunk_size, cur_addr, val);
10123 				if (rc)
10124 					goto end;
10125 
10126 				dma_num++;
10127 				cur_addr += chunk_size;
10128 				if (cur_addr == end_addr)
10129 					break;
10130 			}
10131 		}
10132 	}
10133 
10134 	rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
10135 	if (rc) {
10136 		dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
10137 		goto end;
10138 	}
10139 end:
10140 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10141 		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10142 			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10143 			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10144 
10145 			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10146 				continue;
10147 
10148 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
10149 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0);
10150 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0);
10151 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0);
10152 			gaudi2_qman_set_test_mode(hdev,
10153 					edma_queues_id[dcore] + 4 * edma_idx, false);
10154 		}
10155 	}
10156 
10157 	WREG32(sob_addr, 0);
10158 	hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr);
10159 
10160 	return rc;
10161 }
10162 
10163 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val)
10164 {
10165 	int rc;
10166 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10167 	u64 size = prop->dram_end_address - prop->dram_user_base_address;
10168 
10169 	rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val);
10170 
10171 	if (rc)
10172 		dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n",
10173 				prop->dram_user_base_address, size);
10174 	return rc;
10175 }
10176 
10177 static int gaudi2_scrub_device_mem(struct hl_device *hdev)
10178 {
10179 	int rc;
10180 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10181 	u64 val = hdev->memory_scrub_val;
10182 	u64 addr, size;
10183 
10184 	if (!hdev->memory_scrub)
10185 		return 0;
10186 
10187 	/* scrub SRAM */
10188 	addr = prop->sram_user_base_address;
10189 	size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET);
10190 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n",
10191 			addr, addr + size, val);
10192 	rc = gaudi2_memset_device_memory(hdev, addr, size, val);
10193 	if (rc) {
10194 		dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc);
10195 		return rc;
10196 	}
10197 
10198 	/* scrub DRAM */
10199 	rc = gaudi2_scrub_device_dram(hdev, val);
10200 	if (rc) {
10201 		dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc);
10202 		return rc;
10203 	}
10204 	return 0;
10205 }
10206 
10207 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
10208 {
10209 	u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr,
10210 		cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr;
10211 	u32 val, size, offset;
10212 	int dcore_id;
10213 
10214 	offset = hdev->asic_prop.first_available_cq[0] * 4;
10215 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset;
10216 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset;
10217 	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset;
10218 	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset;
10219 	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset;
10220 	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset;
10221 	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 -
10222 			(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset);
10223 
10224 	/* memset dcore0 CQ registers */
10225 	gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10226 	gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10227 	gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10228 	gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10229 	gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10230 	gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10231 
10232 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
10233 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
10234 	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET;
10235 	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET;
10236 	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET;
10237 	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET;
10238 	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0;
10239 
10240 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10241 		gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10242 		gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10243 		gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10244 		gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10245 		gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10246 		gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10247 
10248 		cq_lbw_l_addr += DCORE_OFFSET;
10249 		cq_lbw_h_addr += DCORE_OFFSET;
10250 		cq_lbw_data_addr += DCORE_OFFSET;
10251 		cq_base_l_addr += DCORE_OFFSET;
10252 		cq_base_h_addr += DCORE_OFFSET;
10253 		cq_size_addr += DCORE_OFFSET;
10254 	}
10255 
10256 	offset = hdev->asic_prop.first_available_user_mon[0] * 4;
10257 	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset;
10258 	val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT;
10259 	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset);
10260 
10261 	/* memset dcore0 monitors */
10262 	gaudi2_memset_device_lbw(hdev, addr, size, val);
10263 
10264 	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset;
10265 	gaudi2_memset_device_lbw(hdev, addr, size, 0);
10266 
10267 	mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET;
10268 	mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET;
10269 	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0;
10270 
10271 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10272 		gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val);
10273 		gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0);
10274 		mon_sts_addr += DCORE_OFFSET;
10275 		mon_cfg_addr += DCORE_OFFSET;
10276 	}
10277 
10278 	offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10279 	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset;
10280 	val = 0;
10281 	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 -
10282 			(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10283 
10284 	/* memset dcore0 sobs */
10285 	gaudi2_memset_device_lbw(hdev, addr, size, val);
10286 
10287 	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET;
10288 	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0;
10289 
10290 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10291 		gaudi2_memset_device_lbw(hdev, addr, size, val);
10292 		addr += DCORE_OFFSET;
10293 	}
10294 
10295 	/* Flush all WREG to prevent race */
10296 	val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10297 }
10298 
10299 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev)
10300 {
10301 	u32 reg_base, hw_queue_id;
10302 
10303 	for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0;
10304 							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10305 		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10306 			continue;
10307 
10308 		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10309 
10310 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10311 		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10312 	}
10313 
10314 	/* Flush all WREG to prevent race */
10315 	RREG32(mmPDMA0_QM_ARB_CFG_0);
10316 }
10317 
10318 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev)
10319 {
10320 	u32 reg_base, hw_queue_id;
10321 
10322 	for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3;
10323 							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10324 		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10325 			continue;
10326 
10327 		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10328 
10329 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10330 		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10331 	}
10332 
10333 	/* Flush all WREG to prevent race */
10334 	RREG32(mmPDMA0_QM_ARB_CFG_0);
10335 }
10336 
10337 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid)
10338 {
10339 	return 0;
10340 }
10341 
10342 static void gaudi2_restore_phase_topology(struct hl_device *hdev)
10343 {
10344 }
10345 
10346 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx,
10347 						struct dup_block_ctx *cfg_ctx)
10348 {
10349 	u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off;
10350 	u8 seq;
10351 	int i;
10352 
10353 	for (i = 0 ; i < cfg_ctx->instances ; i++) {
10354 		seq = block_idx * cfg_ctx->instances + i;
10355 
10356 		/* skip disabled instance */
10357 		if (!(cfg_ctx->enabled_mask & BIT_ULL(seq)))
10358 			continue;
10359 
10360 		cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off,
10361 					cfg_ctx->data);
10362 	}
10363 }
10364 
10365 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx,
10366 						u64 mask)
10367 {
10368 	int i;
10369 
10370 	cfg_ctx->enabled_mask = mask;
10371 
10372 	for (i = 0 ; i < cfg_ctx->blocks ; i++)
10373 		gaudi2_init_block_instances(hdev, i, cfg_ctx);
10374 }
10375 
10376 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx)
10377 {
10378 	gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX);
10379 }
10380 
10381 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
10382 {
10383 	void *host_mem_virtual_addr;
10384 	dma_addr_t host_mem_dma_addr;
10385 	u64 reserved_va_base;
10386 	u32 pos, size_left, size_to_dma;
10387 	struct hl_ctx *ctx;
10388 	int rc = 0;
10389 
10390 	/* Fetch the ctx */
10391 	ctx = hl_get_compute_ctx(hdev);
10392 	if (!ctx) {
10393 		dev_err(hdev->dev, "No ctx available\n");
10394 		return -EINVAL;
10395 	}
10396 
10397 	/* Allocate buffers for read and for poll */
10398 	host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr,
10399 								GFP_KERNEL | __GFP_ZERO);
10400 	if (host_mem_virtual_addr == NULL) {
10401 		dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n");
10402 		rc = -ENOMEM;
10403 		goto put_ctx;
10404 	}
10405 
10406 	/* Reserve VM region on asic side */
10407 	reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M,
10408 						HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10409 	if (!reserved_va_base) {
10410 		dev_err(hdev->dev, "Failed to reserve vmem on asic\n");
10411 		rc = -ENOMEM;
10412 		goto free_data_buffer;
10413 	}
10414 
10415 	/* Create mapping on asic side */
10416 	mutex_lock(&hdev->mmu_lock);
10417 
10418 	rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
10419 	if (rc) {
10420 		dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
10421 		goto unreserve_va;
10422 	}
10423 
10424 	rc = hl_mmu_invalidate_cache_range(hdev, false,
10425 				      MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
10426 				      ctx->asid, reserved_va_base, SZ_2M);
10427 	if (rc) {
10428 		hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10429 		goto unreserve_va;
10430 	}
10431 
10432 	mutex_unlock(&hdev->mmu_lock);
10433 
10434 	/* Enable MMU on KDMA */
10435 	gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
10436 
10437 	pos = 0;
10438 	size_left = size;
10439 	size_to_dma = SZ_2M;
10440 
10441 	while (size_left > 0) {
10442 		if (size_left < SZ_2M)
10443 			size_to_dma = size_left;
10444 
10445 		rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false);
10446 		if (rc)
10447 			break;
10448 
10449 		memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma);
10450 
10451 		if (size_left <= SZ_2M)
10452 			break;
10453 
10454 		pos += SZ_2M;
10455 		addr += SZ_2M;
10456 		size_left -= SZ_2M;
10457 	}
10458 
10459 	gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
10460 
10461 	mutex_lock(&hdev->mmu_lock);
10462 
10463 	rc = hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10464 	if (rc)
10465 		goto unreserve_va;
10466 
10467 	rc = hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
10468 				      ctx->asid, reserved_va_base, SZ_2M);
10469 
10470 unreserve_va:
10471 	mutex_unlock(&hdev->mmu_lock);
10472 	hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
10473 free_data_buffer:
10474 	hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
10475 put_ctx:
10476 	hl_ctx_put(ctx);
10477 
10478 	return rc;
10479 }
10480 
10481 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx)
10482 {
10483 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10484 	int min_alloc_order, rc;
10485 
10486 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10487 		return 0;
10488 
10489 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
10490 								HOST_SPACE_INTERNAL_CB_SZ,
10491 								&hdev->internal_cb_pool_dma_addr,
10492 								GFP_KERNEL | __GFP_ZERO);
10493 
10494 	if (!hdev->internal_cb_pool_virt_addr)
10495 		return -ENOMEM;
10496 
10497 	min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev),
10498 					gaudi2_get_wait_cb_size(hdev)));
10499 
10500 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
10501 	if (!hdev->internal_cb_pool) {
10502 		dev_err(hdev->dev, "Failed to create internal CB pool\n");
10503 		rc = -ENOMEM;
10504 		goto free_internal_cb_pool;
10505 	}
10506 
10507 	rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr,
10508 				HOST_SPACE_INTERNAL_CB_SZ, -1);
10509 	if (rc) {
10510 		dev_err(hdev->dev, "Failed to add memory to internal CB pool\n");
10511 		rc = -EFAULT;
10512 		goto destroy_internal_cb_pool;
10513 	}
10514 
10515 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
10516 					HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10517 
10518 	if (!hdev->internal_cb_va_base) {
10519 		rc = -ENOMEM;
10520 		goto destroy_internal_cb_pool;
10521 	}
10522 
10523 	mutex_lock(&hdev->mmu_lock);
10524 
10525 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
10526 					HOST_SPACE_INTERNAL_CB_SZ);
10527 	if (rc)
10528 		goto unreserve_internal_cb_pool;
10529 
10530 	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
10531 	if (rc)
10532 		goto unmap_internal_cb_pool;
10533 
10534 	mutex_unlock(&hdev->mmu_lock);
10535 
10536 	return 0;
10537 
10538 unmap_internal_cb_pool:
10539 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10540 unreserve_internal_cb_pool:
10541 	mutex_unlock(&hdev->mmu_lock);
10542 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10543 destroy_internal_cb_pool:
10544 	gen_pool_destroy(hdev->internal_cb_pool);
10545 free_internal_cb_pool:
10546 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10547 					hdev->internal_cb_pool_dma_addr);
10548 
10549 	return rc;
10550 }
10551 
10552 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx)
10553 {
10554 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10555 
10556 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10557 		return;
10558 
10559 	mutex_lock(&hdev->mmu_lock);
10560 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10561 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10562 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
10563 	mutex_unlock(&hdev->mmu_lock);
10564 
10565 	gen_pool_destroy(hdev->internal_cb_pool);
10566 
10567 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10568 					hdev->internal_cb_pool_dma_addr);
10569 }
10570 
10571 static void gaudi2_restore_user_registers(struct hl_device *hdev)
10572 {
10573 	gaudi2_restore_user_sm_registers(hdev);
10574 	gaudi2_restore_user_qm_registers(hdev);
10575 }
10576 
10577 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10578 {
10579 	struct hl_device *hdev = ctx->hdev;
10580 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10581 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10582 	int rc;
10583 
10584 	rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10585 				gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
10586 	if (rc)
10587 		dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
10588 			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10589 
10590 	return rc;
10591 }
10592 
10593 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10594 {
10595 	struct hl_device *hdev = ctx->hdev;
10596 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10597 	int rc;
10598 
10599 	rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10600 				prop->pmmu.page_size, true);
10601 	if (rc)
10602 		dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
10603 			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10604 }
10605 
10606 static int gaudi2_ctx_init(struct hl_ctx *ctx)
10607 {
10608 	int rc;
10609 
10610 	rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
10611 	if (rc)
10612 		return rc;
10613 
10614 	/* No need to clear user registers if the device has just
10615 	 * performed reset, we restore only nic qm registers
10616 	 */
10617 	if (ctx->hdev->reset_upon_device_release)
10618 		gaudi2_restore_nic_qm_registers(ctx->hdev);
10619 	else
10620 		gaudi2_restore_user_registers(ctx->hdev);
10621 
10622 	rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
10623 	if (rc)
10624 		return rc;
10625 
10626 	rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
10627 	if (rc)
10628 		gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10629 
10630 	return rc;
10631 }
10632 
10633 static void gaudi2_ctx_fini(struct hl_ctx *ctx)
10634 {
10635 	if (ctx->asid == HL_KERNEL_ASID_ID)
10636 		return;
10637 
10638 	gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10639 
10640 	gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
10641 }
10642 
10643 static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
10644 {
10645 	struct hl_device *hdev = cs->ctx->hdev;
10646 	int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
10647 	u32 mon_payload, sob_id, mon_id;
10648 
10649 	if (!cs_needs_completion(cs))
10650 		return 0;
10651 
10652 	/*
10653 	 * First 64 SOB/MON are reserved for driver for QMAN auto completion
10654 	 * mechanism. Each SOB/MON pair are used for a pending CS with the same
10655 	 * cyclic index. The SOB value is increased when each of the CS jobs is
10656 	 * completed. When the SOB reaches the number of CS jobs, the monitor
10657 	 * generates MSI-X interrupt.
10658 	 */
10659 
10660 	sob_id = mon_id = index;
10661 	mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
10662 				(1 << CQ_ENTRY_READY_SHIFT) | index;
10663 
10664 	gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
10665 				cs->jobs_cnt);
10666 
10667 	return 0;
10668 }
10669 
10670 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
10671 {
10672 	return HL_INVALID_QUEUE;
10673 }
10674 
10675 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
10676 {
10677 	struct hl_cb *cb = data;
10678 	struct packet_msg_short *pkt;
10679 	u32 value, ctl, pkt_size = sizeof(*pkt);
10680 
10681 	pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size);
10682 	memset(pkt, 0, pkt_size);
10683 
10684 	/* Inc by 1, Mode ADD */
10685 	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
10686 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
10687 
10688 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
10689 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */
10690 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10691 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb);
10692 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10693 
10694 	pkt->value = cpu_to_le32(value);
10695 	pkt->ctl = cpu_to_le32(ctl);
10696 
10697 	return size + pkt_size;
10698 }
10699 
10700 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)
10701 {
10702 	u32 ctl, pkt_size = sizeof(*pkt);
10703 
10704 	memset(pkt, 0, pkt_size);
10705 
10706 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10707 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0);  /* MON base */
10708 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10709 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10710 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0);
10711 
10712 	pkt->value = cpu_to_le32(value);
10713 	pkt->ctl = cpu_to_le32(ctl);
10714 
10715 	return pkt_size;
10716 }
10717 
10718 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt,
10719 					u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr)
10720 {
10721 	u32 ctl, value, pkt_size = sizeof(*pkt);
10722 	u8 mask;
10723 
10724 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
10725 		dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask);
10726 		return 0;
10727 	}
10728 
10729 	memset(pkt, 0, pkt_size);
10730 
10731 	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
10732 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
10733 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/
10734 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask);
10735 
10736 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10737 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
10738 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10739 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10740 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10741 
10742 	pkt->value = cpu_to_le32(value);
10743 	pkt->ctl = cpu_to_le32(ctl);
10744 
10745 	return pkt_size;
10746 }
10747 
10748 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
10749 {
10750 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
10751 
10752 	memset(pkt, 0, pkt_size);
10753 
10754 	cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
10755 	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
10756 	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2);
10757 
10758 	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
10759 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10760 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10761 
10762 	pkt->cfg = cpu_to_le32(cfg);
10763 	pkt->ctl = cpu_to_le32(ctl);
10764 
10765 	return pkt_size;
10766 }
10767 
10768 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
10769 {
10770 	struct hl_cb *cb = prop->data;
10771 	void *buf = (void *) (uintptr_t) (cb->kernel_address);
10772 
10773 	u64 monitor_base, fence_addr = 0;
10774 	u32 stream_index, size = prop->size;
10775 	u16 msg_addr_offset;
10776 
10777 	stream_index = prop->q_idx % 4;
10778 	fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] +
10779 			QM_FENCE2_OFFSET + stream_index * 4;
10780 
10781 	/*
10782 	 * monitor_base should be the content of the base0 address registers,
10783 	 * so it will be added to the msg short offsets
10784 	 */
10785 	monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
10786 
10787 	/* First monitor config packet: low address of the sync */
10788 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) -
10789 				monitor_base;
10790 
10791 	size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset);
10792 
10793 	/* Second monitor config packet: high address of the sync */
10794 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) -
10795 				monitor_base;
10796 
10797 	size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset);
10798 
10799 	/*
10800 	 * Third monitor config packet: the payload, i.e. what to write when the
10801 	 * sync triggers
10802 	 */
10803 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) -
10804 				monitor_base;
10805 
10806 	size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset);
10807 
10808 	/* Fourth monitor config packet: bind the monitor to a sync object */
10809 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base;
10810 
10811 	size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask,
10812 						prop->sob_val, msg_addr_offset);
10813 
10814 	/* Fence packet */
10815 	size += gaudi2_add_fence_pkt(buf + size);
10816 
10817 	return size;
10818 }
10819 
10820 static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
10821 {
10822 	struct hl_hw_sob *hw_sob = data;
10823 
10824 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
10825 
10826 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0);
10827 
10828 	kref_init(&hw_sob->kref);
10829 }
10830 
10831 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group)
10832 {
10833 }
10834 
10835 static u64 gaudi2_get_device_time(struct hl_device *hdev)
10836 {
10837 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
10838 
10839 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
10840 }
10841 
10842 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs)
10843 {
10844 	return 0;
10845 }
10846 
10847 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx,
10848 					struct hl_cs *cs, u32 wait_queue_id,
10849 					u32 collective_engine_id, u32 encaps_signal_offset)
10850 {
10851 	return -EINVAL;
10852 }
10853 
10854 /*
10855  * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address
10856  *                   to DMMU page-size address (64MB) before mapping it in
10857  *                   the MMU.
10858  * The operation is performed on both the virtual and physical addresses.
10859  * for device with 6 HBMs the scramble is:
10860  * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48]
10861  *
10862  * Example:
10863  * =============================================================================
10864  * Allocated DRAM  Reserved VA      scrambled VA for MMU mapping    Scrambled PA
10865  * Phys address                                                     in MMU last
10866  *                                                                    HOP
10867  * =============================================================================
10868  * PA1 0x3000000  VA1 0x9C000000  SVA1= (VA1/48M)*64M 0xD0000000  <- PA1/48M 0x1
10869  * PA2 0x9000000  VA2 0x9F000000  SVA2= (VA2/48M)*64M 0xD4000000  <- PA2/48M 0x3
10870  * =============================================================================
10871  */
10872 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr)
10873 {
10874 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10875 	u32 divisor, mod_va;
10876 	u64 div_va;
10877 
10878 	/* accept any address in the DRAM address space */
10879 	if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE,
10880 									VA_HBM_SPACE_END)) {
10881 
10882 		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10883 		div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va);
10884 		return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) |
10885 			(div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) |
10886 			(mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT);
10887 	}
10888 
10889 	return raw_addr;
10890 }
10891 
10892 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr)
10893 {
10894 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10895 	u32 divisor, mod_va;
10896 	u64 div_va;
10897 
10898 	/* accept any address in the DRAM address space */
10899 	if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE,
10900 									VA_HBM_SPACE_END)) {
10901 
10902 		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10903 		div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK,
10904 					PAGE_SIZE_64MB, &mod_va);
10905 
10906 		return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) +
10907 					(div_va * divisor + mod_va));
10908 	}
10909 
10910 	return scrambled_addr;
10911 }
10912 
10913 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id)
10914 {
10915 	u32 base = 0, dcore_id, dec_id;
10916 
10917 	if (core_id >= NUMBER_OF_DEC) {
10918 		dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id);
10919 		goto out;
10920 	}
10921 
10922 	if (core_id < 8) {
10923 		dcore_id = core_id / NUM_OF_DEC_PER_DCORE;
10924 		dec_id = core_id % NUM_OF_DEC_PER_DCORE;
10925 
10926 		base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET +
10927 				dec_id * DCORE_VDEC_OFFSET;
10928 	} else {
10929 		/* PCIe Shared Decoder */
10930 		base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET);
10931 	}
10932 out:
10933 	return base;
10934 }
10935 
10936 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
10937 				u32 *block_size, u32 *block_id)
10938 {
10939 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10940 	int i;
10941 
10942 	for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) {
10943 		if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) {
10944 			*block_id = i;
10945 			if (block_size)
10946 				*block_size = gaudi2->mapped_blocks[i].size;
10947 			return 0;
10948 		}
10949 	}
10950 
10951 	dev_err(hdev->dev, "Invalid block address %#llx", block_addr);
10952 
10953 	return -EINVAL;
10954 }
10955 
10956 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
10957 			u32 block_id, u32 block_size)
10958 {
10959 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10960 	u64 offset_in_bar;
10961 	u64 address;
10962 	int rc;
10963 
10964 	if (block_id >= NUM_USER_MAPPED_BLOCKS) {
10965 		dev_err(hdev->dev, "Invalid block id %u", block_id);
10966 		return -EINVAL;
10967 	}
10968 
10969 	/* we allow mapping only an entire block */
10970 	if (block_size != gaudi2->mapped_blocks[block_id].size) {
10971 		dev_err(hdev->dev, "Invalid block size %u", block_size);
10972 		return -EINVAL;
10973 	}
10974 
10975 	offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR;
10976 
10977 	address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
10978 
10979 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
10980 			VM_DONTCOPY | VM_NORESERVE);
10981 
10982 	rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
10983 			block_size, vma->vm_page_prot);
10984 	if (rc)
10985 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
10986 
10987 	return rc;
10988 }
10989 
10990 static void gaudi2_enable_events_from_fw(struct hl_device *hdev)
10991 {
10992 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10993 
10994 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
10995 	u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq);
10996 
10997 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
10998 		WREG32(irq_handler_offset,
10999 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id);
11000 }
11001 
11002 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base)
11003 {
11004 	switch (mmu_id) {
11005 	case HW_CAP_DCORE0_DMMU0:
11006 		*mmu_base = mmDCORE0_HMMU0_MMU_BASE;
11007 		break;
11008 	case HW_CAP_DCORE0_DMMU1:
11009 		*mmu_base = mmDCORE0_HMMU1_MMU_BASE;
11010 		break;
11011 	case HW_CAP_DCORE0_DMMU2:
11012 		*mmu_base = mmDCORE0_HMMU2_MMU_BASE;
11013 		break;
11014 	case HW_CAP_DCORE0_DMMU3:
11015 		*mmu_base = mmDCORE0_HMMU3_MMU_BASE;
11016 		break;
11017 	case HW_CAP_DCORE1_DMMU0:
11018 		*mmu_base = mmDCORE1_HMMU0_MMU_BASE;
11019 		break;
11020 	case HW_CAP_DCORE1_DMMU1:
11021 		*mmu_base = mmDCORE1_HMMU1_MMU_BASE;
11022 		break;
11023 	case HW_CAP_DCORE1_DMMU2:
11024 		*mmu_base = mmDCORE1_HMMU2_MMU_BASE;
11025 		break;
11026 	case HW_CAP_DCORE1_DMMU3:
11027 		*mmu_base = mmDCORE1_HMMU3_MMU_BASE;
11028 		break;
11029 	case HW_CAP_DCORE2_DMMU0:
11030 		*mmu_base = mmDCORE2_HMMU0_MMU_BASE;
11031 		break;
11032 	case HW_CAP_DCORE2_DMMU1:
11033 		*mmu_base = mmDCORE2_HMMU1_MMU_BASE;
11034 		break;
11035 	case HW_CAP_DCORE2_DMMU2:
11036 		*mmu_base = mmDCORE2_HMMU2_MMU_BASE;
11037 		break;
11038 	case HW_CAP_DCORE2_DMMU3:
11039 		*mmu_base = mmDCORE2_HMMU3_MMU_BASE;
11040 		break;
11041 	case HW_CAP_DCORE3_DMMU0:
11042 		*mmu_base = mmDCORE3_HMMU0_MMU_BASE;
11043 		break;
11044 	case HW_CAP_DCORE3_DMMU1:
11045 		*mmu_base = mmDCORE3_HMMU1_MMU_BASE;
11046 		break;
11047 	case HW_CAP_DCORE3_DMMU2:
11048 		*mmu_base = mmDCORE3_HMMU2_MMU_BASE;
11049 		break;
11050 	case HW_CAP_DCORE3_DMMU3:
11051 		*mmu_base = mmDCORE3_HMMU3_MMU_BASE;
11052 		break;
11053 	case HW_CAP_PMMU:
11054 		*mmu_base = mmPMMU_HBW_MMU_BASE;
11055 		break;
11056 	default:
11057 		return -EINVAL;
11058 	}
11059 
11060 	return 0;
11061 }
11062 
11063 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
11064 {
11065 	bool is_pmmu = (mmu_id == HW_CAP_PMMU);
11066 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
11067 	u32 mmu_base;
11068 
11069 	if (!(gaudi2->hw_cap_initialized & mmu_id))
11070 		return;
11071 
11072 	if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
11073 		return;
11074 
11075 	gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL);
11076 	gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
11077 }
11078 
11079 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
11080 {
11081 	u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES;
11082 
11083 	/* check all HMMUs */
11084 	for (i = 0 ; i < num_of_hmmus ; i++) {
11085 		mmu_id = HW_CAP_DCORE0_DMMU0 << i;
11086 
11087 		if (mmu_cap_mask & mmu_id)
11088 			gaudi2_ack_mmu_error(hdev, mmu_id);
11089 	}
11090 
11091 	/* check PMMU */
11092 	if (mmu_cap_mask & HW_CAP_PMMU)
11093 		gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU);
11094 
11095 	return 0;
11096 }
11097 
11098 static void gaudi2_get_msi_info(__le32 *table)
11099 {
11100 	table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
11101 }
11102 
11103 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
11104 {
11105 	switch (pll_idx) {
11106 	case HL_GAUDI2_CPU_PLL: return CPU_PLL;
11107 	case HL_GAUDI2_PCI_PLL: return PCI_PLL;
11108 	case HL_GAUDI2_NIC_PLL: return NIC_PLL;
11109 	case HL_GAUDI2_DMA_PLL: return DMA_PLL;
11110 	case HL_GAUDI2_MESH_PLL: return MESH_PLL;
11111 	case HL_GAUDI2_MME_PLL: return MME_PLL;
11112 	case HL_GAUDI2_TPC_PLL: return TPC_PLL;
11113 	case HL_GAUDI2_IF_PLL: return IF_PLL;
11114 	case HL_GAUDI2_SRAM_PLL: return SRAM_PLL;
11115 	case HL_GAUDI2_HBM_PLL: return HBM_PLL;
11116 	case HL_GAUDI2_VID_PLL: return VID_PLL;
11117 	case HL_GAUDI2_MSS_PLL: return MSS_PLL;
11118 	default: return -EINVAL;
11119 	}
11120 }
11121 
11122 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map)
11123 {
11124 	/* Not implemented */
11125 	return 0;
11126 }
11127 
11128 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon)
11129 {
11130 	/* Not implemented */
11131 	return 0;
11132 }
11133 
11134 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset,
11135 				struct hl_device *hdev, struct hl_mon_state_dump *mon)
11136 {
11137 	/* Not implemented */
11138 	return 0;
11139 }
11140 
11141 
11142 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset,
11143 				u64 status_base_offset, enum hl_sync_engine_type engine_type,
11144 				u32 engine_id, char **buf, size_t *size, size_t *offset)
11145 {
11146 	/* Not implemented */
11147 	return 0;
11148 }
11149 
11150 
11151 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = {
11152 	.monitor_valid = gaudi2_monitor_valid,
11153 	.print_single_monitor = gaudi2_print_single_monitor,
11154 	.gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map,
11155 	.print_fences_single_engine = gaudi2_print_fences_single_engine,
11156 };
11157 
11158 static void gaudi2_state_dump_init(struct hl_device *hdev)
11159 {
11160 	/* Not implemented */
11161 	hdev->state_dump_specs.props = gaudi2_state_dump_specs_props;
11162 	hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs;
11163 }
11164 
11165 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id)
11166 {
11167 	return 0;
11168 }
11169 
11170 static u32 *gaudi2_get_stream_master_qid_arr(void)
11171 {
11172 	return NULL;
11173 }
11174 
11175 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
11176 				struct attribute_group *dev_vrm_attr_grp)
11177 {
11178 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
11179 	hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp);
11180 }
11181 
11182 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
11183 					u32 page_size, u32 *real_page_size, bool is_dram_addr)
11184 {
11185 	struct asic_fixed_properties *prop = &hdev->asic_prop;
11186 
11187 	/* for host pages the page size must be  */
11188 	if (!is_dram_addr) {
11189 		if (page_size % mmu_prop->page_size)
11190 			goto page_size_err;
11191 
11192 		*real_page_size = mmu_prop->page_size;
11193 		return 0;
11194 	}
11195 
11196 	if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size))
11197 		goto page_size_err;
11198 
11199 	/*
11200 	 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater
11201 	 * than DRAM page size).
11202 	 * for this reason work with the DRAM page size and let the MMU scrambling routine handle
11203 	 * this mismatch when calculating the address to place in the MMU page table.
11204 	 * (in that case also make sure that the dram_page_size is not greater than the
11205 	 * mmu page size)
11206 	 */
11207 	*real_page_size = prop->dram_page_size;
11208 
11209 	return 0;
11210 
11211 page_size_err:
11212 	dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
11213 							page_size, mmu_prop->page_size >> 10);
11214 	return -EFAULT;
11215 }
11216 
11217 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
11218 {
11219 	return -EOPNOTSUPP;
11220 }
11221 
11222 int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
11223 {
11224 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
11225 
11226 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
11227 		return 0;
11228 
11229 	return hl_fw_send_device_activity(hdev, open);
11230 }
11231 
11232 static const struct hl_asic_funcs gaudi2_funcs = {
11233 	.early_init = gaudi2_early_init,
11234 	.early_fini = gaudi2_early_fini,
11235 	.late_init = gaudi2_late_init,
11236 	.late_fini = gaudi2_late_fini,
11237 	.sw_init = gaudi2_sw_init,
11238 	.sw_fini = gaudi2_sw_fini,
11239 	.hw_init = gaudi2_hw_init,
11240 	.hw_fini = gaudi2_hw_fini,
11241 	.halt_engines = gaudi2_halt_engines,
11242 	.suspend = gaudi2_suspend,
11243 	.resume = gaudi2_resume,
11244 	.mmap = gaudi2_mmap,
11245 	.ring_doorbell = gaudi2_ring_doorbell,
11246 	.pqe_write = gaudi2_pqe_write,
11247 	.asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent,
11248 	.asic_dma_free_coherent = gaudi2_dma_free_coherent,
11249 	.scrub_device_mem = gaudi2_scrub_device_mem,
11250 	.scrub_device_dram = gaudi2_scrub_device_dram,
11251 	.get_int_queue_base = NULL,
11252 	.test_queues = gaudi2_test_queues,
11253 	.asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc,
11254 	.asic_dma_pool_free = gaudi2_dma_pool_free,
11255 	.cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
11256 	.cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
11257 	.asic_dma_unmap_single = gaudi2_dma_unmap_single,
11258 	.asic_dma_map_single = gaudi2_dma_map_single,
11259 	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
11260 	.cs_parser = gaudi2_cs_parser,
11261 	.asic_dma_map_sgtable = hl_dma_map_sgtable,
11262 	.add_end_of_cb_packets = NULL,
11263 	.update_eq_ci = gaudi2_update_eq_ci,
11264 	.context_switch = gaudi2_context_switch,
11265 	.restore_phase_topology = gaudi2_restore_phase_topology,
11266 	.debugfs_read_dma = gaudi2_debugfs_read_dma,
11267 	.add_device_attr = gaudi2_add_device_attr,
11268 	.handle_eqe = gaudi2_handle_eqe,
11269 	.get_events_stat = gaudi2_get_events_stat,
11270 	.read_pte = NULL,
11271 	.write_pte = NULL,
11272 	.mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
11273 	.mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
11274 	.mmu_prefetch_cache_range = NULL,
11275 	.send_heartbeat = gaudi2_send_heartbeat,
11276 	.debug_coresight = gaudi2_debug_coresight,
11277 	.is_device_idle = gaudi2_is_device_idle,
11278 	.compute_reset_late_init = gaudi2_compute_reset_late_init,
11279 	.hw_queues_lock = gaudi2_hw_queues_lock,
11280 	.hw_queues_unlock = gaudi2_hw_queues_unlock,
11281 	.get_pci_id = gaudi2_get_pci_id,
11282 	.get_eeprom_data = gaudi2_get_eeprom_data,
11283 	.get_monitor_dump = gaudi2_get_monitor_dump,
11284 	.send_cpu_message = gaudi2_send_cpu_message,
11285 	.pci_bars_map = gaudi2_pci_bars_map,
11286 	.init_iatu = gaudi2_init_iatu,
11287 	.rreg = hl_rreg,
11288 	.wreg = hl_wreg,
11289 	.halt_coresight = gaudi2_halt_coresight,
11290 	.ctx_init = gaudi2_ctx_init,
11291 	.ctx_fini = gaudi2_ctx_fini,
11292 	.pre_schedule_cs = gaudi2_pre_schedule_cs,
11293 	.get_queue_id_for_cq = gaudi2_get_queue_id_for_cq,
11294 	.load_firmware_to_device = NULL,
11295 	.load_boot_fit_to_device = NULL,
11296 	.get_signal_cb_size = gaudi2_get_signal_cb_size,
11297 	.get_wait_cb_size = gaudi2_get_wait_cb_size,
11298 	.gen_signal_cb = gaudi2_gen_signal_cb,
11299 	.gen_wait_cb = gaudi2_gen_wait_cb,
11300 	.reset_sob = gaudi2_reset_sob,
11301 	.reset_sob_group = gaudi2_reset_sob_group,
11302 	.get_device_time = gaudi2_get_device_time,
11303 	.pb_print_security_errors = gaudi2_pb_print_security_errors,
11304 	.collective_wait_init_cs = gaudi2_collective_wait_init_cs,
11305 	.collective_wait_create_jobs = gaudi2_collective_wait_create_jobs,
11306 	.get_dec_base_addr = gaudi2_get_dec_base_addr,
11307 	.scramble_addr = gaudi2_mmu_scramble_addr,
11308 	.descramble_addr = gaudi2_mmu_descramble_addr,
11309 	.ack_protection_bits_errors = gaudi2_ack_protection_bits_errors,
11310 	.get_hw_block_id = gaudi2_get_hw_block_id,
11311 	.hw_block_mmap = gaudi2_block_mmap,
11312 	.enable_events_from_fw = gaudi2_enable_events_from_fw,
11313 	.ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
11314 	.get_msi_info = gaudi2_get_msi_info,
11315 	.map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
11316 	.init_firmware_preload_params = gaudi2_init_firmware_preload_params,
11317 	.init_firmware_loader = gaudi2_init_firmware_loader,
11318 	.init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
11319 	.state_dump_init = gaudi2_state_dump_init,
11320 	.get_sob_addr = &gaudi2_get_sob_addr,
11321 	.set_pci_memory_regions = gaudi2_set_pci_memory_regions,
11322 	.get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr,
11323 	.check_if_razwi_happened = gaudi2_check_if_razwi_happened,
11324 	.mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
11325 	.access_dev_mem = hl_access_dev_mem,
11326 	.set_dram_bar_base = gaudi2_set_hbm_bar_base,
11327 	.set_engine_cores = gaudi2_set_engine_cores,
11328 	.set_engines = gaudi2_set_engines,
11329 	.send_device_activity = gaudi2_send_device_activity,
11330 	.set_dram_properties = gaudi2_set_dram_properties,
11331 	.set_binning_masks = gaudi2_set_binning_masks,
11332 };
11333 
11334 void gaudi2_set_asic_funcs(struct hl_device *hdev)
11335 {
11336 	hdev->asic_funcs = &gaudi2_funcs;
11337 }
11338