xref: /openbmc/linux/drivers/accel/habanalabs/gaudi2/gaudi2.c (revision 583f12a80dfb7997d59a42e8642019695f5aa15a)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2020-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudi2P.h"
9 #include "gaudi2_masks.h"
10 #include "../include/gaudi2/gaudi2_special_blocks.h"
11 #include "../include/hw_ip/mmu/mmu_general.h"
12 #include "../include/hw_ip/mmu/mmu_v2_0.h"
13 #include "../include/gaudi2/gaudi2_packets.h"
14 #include "../include/gaudi2/gaudi2_reg_map.h"
15 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h"
16 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h"
17 
18 #include <linux/module.h>
19 #include <linux/pci.h>
20 #include <linux/hwmon.h>
21 #include <linux/iommu.h>
22 
23 #define GAUDI2_DMA_POOL_BLK_SIZE		SZ_256		/* 256 bytes */
24 
25 #define GAUDI2_RESET_TIMEOUT_MSEC		2000		/* 2000ms */
26 
27 #define GAUDI2_RESET_POLL_TIMEOUT_USEC		500000		/* 500ms */
28 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC		25000		/* 25s */
29 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC		25000		/* 25s */
30 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC	3000000		/* 3s */
31 #define GAUDI2_RESET_POLL_CNT			3
32 #define GAUDI2_RESET_WAIT_MSEC			1		/* 1ms */
33 #define GAUDI2_CPU_RESET_WAIT_MSEC		100		/* 100ms */
34 #define GAUDI2_PLDM_RESET_WAIT_MSEC		1000		/* 1s */
35 #define GAUDI2_CB_POOL_CB_CNT			512
36 #define GAUDI2_CB_POOL_CB_SIZE			SZ_128K		/* 128KB */
37 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC		4000000		/* 4s */
38 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC		25000000	/* 25s */
39 #define GAUDI2_TEST_QUEUE_WAIT_USEC		100000		/* 100ms */
40 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
41 
42 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT		3
43 
44 /*
45  * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs
46  * and the code relies on that value (for array size etc..) we define another value
47  * for MAX faulty TPCs which reflects the cluster binning requirements
48  */
49 #define MAX_CLUSTER_BINNING_FAULTY_TPCS		1
50 #define MAX_FAULTY_XBARS			1
51 #define MAX_FAULTY_EDMAS			1
52 #define MAX_FAULTY_DECODERS			1
53 
54 #define GAUDI2_TPC_FULL_MASK			0x1FFFFFF
55 #define GAUDI2_HIF_HMMU_FULL_MASK		0xFFFF
56 #define GAUDI2_DECODER_FULL_MASK		0x3FF
57 
58 #define GAUDI2_NA_EVENT_CAUSE			0xFF
59 #define GAUDI2_NUM_OF_QM_ERR_CAUSE		18
60 #define GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE		25
61 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE		3
62 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE		14
63 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE		3
64 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE		2
65 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE		22
66 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE		31
67 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE		25
68 #define GAUDI2_NUM_OF_MME_ERR_CAUSE		16
69 #define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE	5
70 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE		7
71 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE	8
72 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE		19
73 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE		9
74 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE		3
75 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE	3
76 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE	2
77 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE	2
78 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE	2
79 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE		5
80 
81 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 10)
82 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC		(MMU_CONFIG_TIMEOUT_USEC * 200)
83 #define GAUDI2_ARB_WDT_TIMEOUT			(0x1000000)
84 
85 #define GAUDI2_VDEC_TIMEOUT_USEC		10000		/* 10ms */
86 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC		(GAUDI2_VDEC_TIMEOUT_USEC * 100)
87 
88 #define KDMA_TIMEOUT_USEC			USEC_PER_SEC
89 
90 #define IS_DMA_IDLE(dma_core_sts0)	\
91 	(!((dma_core_sts0) & (DCORE0_EDMA0_CORE_STS0_BUSY_MASK)))
92 
93 #define IS_DMA_HALTED(dma_core_sts1)	\
94 	((dma_core_sts1) & (DCORE0_EDMA0_CORE_STS1_IS_HALT_MASK))
95 
96 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
97 
98 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK))
99 
100 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \
101 	((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \
102 	(((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \
103 	(((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK)))
104 
105 #define PCIE_DEC_EN_MASK			0x300
106 #define DEC_WORK_STATE_IDLE			0
107 #define DEC_WORK_STATE_PEND			3
108 #define IS_DEC_IDLE(dec_swreg15) \
109 	(((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \
110 	((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) ==  DEC_WORK_STATE_PEND)
111 
112 /* HBM MMU address scrambling parameters */
113 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE		SZ_8M
114 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT		26
115 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT		0
116 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK	DRAM_VA_HINT_MASK
117 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR	16
118 #define MMU_RANGE_INV_VA_LSB_SHIFT		12
119 #define MMU_RANGE_INV_VA_MSB_SHIFT		44
120 #define MMU_RANGE_INV_EN_SHIFT			0
121 #define MMU_RANGE_INV_ASID_EN_SHIFT		1
122 #define MMU_RANGE_INV_ASID_SHIFT		2
123 
124 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
125  * a 2 entries FIFO, and hence it is not enabled for it.
126  */
127 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
128 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK		GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
129 
130 #define GAUDI2_MAX_STRING_LEN			64
131 
132 #define GAUDI2_VDEC_MSIX_ENTRIES		(GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
133 							GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
134 
135 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
136 
137 /* RAZWI initiator coordinates */
138 #define RAZWI_GET_AXUSER_XY(x) \
139 	((x & 0xF8001FF0) >> 4)
140 
141 #define RAZWI_GET_AXUSER_LOW_XY(x) \
142 	((x & 0x00001FF0) >> 4)
143 
144 #define RAZWI_INITIATOR_AXUER_L_X_SHIFT		0
145 #define RAZWI_INITIATOR_AXUER_L_X_MASK		0x1F
146 #define RAZWI_INITIATOR_AXUER_L_Y_SHIFT		5
147 #define RAZWI_INITIATOR_AXUER_L_Y_MASK		0xF
148 
149 #define RAZWI_INITIATOR_AXUER_H_X_SHIFT		23
150 #define RAZWI_INITIATOR_AXUER_H_X_MASK		0x1F
151 
152 #define RAZWI_INITIATOR_ID_X_Y_LOW(x, y) \
153 	((((y) & RAZWI_INITIATOR_AXUER_L_Y_MASK) << RAZWI_INITIATOR_AXUER_L_Y_SHIFT) | \
154 		(((x) & RAZWI_INITIATOR_AXUER_L_X_MASK) << RAZWI_INITIATOR_AXUER_L_X_SHIFT))
155 
156 #define RAZWI_INITIATOR_ID_X_HIGH(x) \
157 		(((x) & RAZWI_INITIATOR_AXUER_H_X_MASK) << RAZWI_INITIATOR_AXUER_H_X_SHIFT)
158 
159 #define RAZWI_INITIATOR_ID_X_Y(xl, yl, xh) \
160 	(RAZWI_INITIATOR_ID_X_Y_LOW(xl, yl) | RAZWI_INITIATOR_ID_X_HIGH(xh))
161 
162 #define PSOC_RAZWI_ENG_STR_SIZE 128
163 #define PSOC_RAZWI_MAX_ENG_PER_RTR 5
164 
165 struct gaudi2_razwi_info {
166 	u32 axuser_xy;
167 	u32 rtr_ctrl;
168 	u16 eng_id;
169 	char *eng_name;
170 };
171 
172 static struct gaudi2_razwi_info common_razwi_info[] = {
173 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 0), mmDCORE0_RTR0_CTRL_BASE,
174 				GAUDI2_DCORE0_ENGINE_ID_DEC_0, "DEC0"},
175 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
176 				GAUDI2_DCORE0_ENGINE_ID_DEC_1, "DEC1"},
177 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 18), mmDCORE1_RTR7_CTRL_BASE,
178 				GAUDI2_DCORE1_ENGINE_ID_DEC_0, "DEC2"},
179 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
180 				GAUDI2_DCORE1_ENGINE_ID_DEC_1, "DEC3"},
181 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 0), mmDCORE2_RTR0_CTRL_BASE,
182 				GAUDI2_DCORE2_ENGINE_ID_DEC_0, "DEC4"},
183 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
184 				GAUDI2_DCORE2_ENGINE_ID_DEC_1, "DEC5"},
185 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 18), mmDCORE3_RTR7_CTRL_BASE,
186 				GAUDI2_DCORE3_ENGINE_ID_DEC_0, "DEC6"},
187 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
188 				GAUDI2_DCORE3_ENGINE_ID_DEC_1, "DEC7"},
189 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 6), mmDCORE0_RTR0_CTRL_BASE,
190 				GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC8"},
191 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 7), mmDCORE0_RTR0_CTRL_BASE,
192 				GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC9"},
193 		{RAZWI_INITIATOR_ID_X_Y(3, 4, 2), mmDCORE0_RTR1_CTRL_BASE,
194 				GAUDI2_DCORE0_ENGINE_ID_TPC_0, "TPC0"},
195 		{RAZWI_INITIATOR_ID_X_Y(3, 4, 4), mmDCORE0_RTR1_CTRL_BASE,
196 				GAUDI2_DCORE0_ENGINE_ID_TPC_1, "TPC1"},
197 		{RAZWI_INITIATOR_ID_X_Y(4, 4, 2), mmDCORE0_RTR2_CTRL_BASE,
198 				GAUDI2_DCORE0_ENGINE_ID_TPC_2, "TPC2"},
199 		{RAZWI_INITIATOR_ID_X_Y(4, 4, 4), mmDCORE0_RTR2_CTRL_BASE,
200 				GAUDI2_DCORE0_ENGINE_ID_TPC_3, "TPC3"},
201 		{RAZWI_INITIATOR_ID_X_Y(5, 4, 2), mmDCORE0_RTR3_CTRL_BASE,
202 				GAUDI2_DCORE0_ENGINE_ID_TPC_4, "TPC4"},
203 		{RAZWI_INITIATOR_ID_X_Y(5, 4, 4), mmDCORE0_RTR3_CTRL_BASE,
204 				GAUDI2_DCORE0_ENGINE_ID_TPC_5, "TPC5"},
205 		{RAZWI_INITIATOR_ID_X_Y(16, 4, 14), mmDCORE1_RTR6_CTRL_BASE,
206 				GAUDI2_DCORE1_ENGINE_ID_TPC_0, "TPC6"},
207 		{RAZWI_INITIATOR_ID_X_Y(16, 4, 16), mmDCORE1_RTR6_CTRL_BASE,
208 				GAUDI2_DCORE1_ENGINE_ID_TPC_1, "TPC7"},
209 		{RAZWI_INITIATOR_ID_X_Y(15, 4, 14), mmDCORE1_RTR5_CTRL_BASE,
210 				GAUDI2_DCORE1_ENGINE_ID_TPC_2, "TPC8"},
211 		{RAZWI_INITIATOR_ID_X_Y(15, 4, 16), mmDCORE1_RTR5_CTRL_BASE,
212 				GAUDI2_DCORE1_ENGINE_ID_TPC_3, "TPC9"},
213 		{RAZWI_INITIATOR_ID_X_Y(14, 4, 14), mmDCORE1_RTR4_CTRL_BASE,
214 				GAUDI2_DCORE1_ENGINE_ID_TPC_4, "TPC10"},
215 		{RAZWI_INITIATOR_ID_X_Y(14, 4, 16), mmDCORE1_RTR4_CTRL_BASE,
216 				GAUDI2_DCORE1_ENGINE_ID_TPC_5, "TPC11"},
217 		{RAZWI_INITIATOR_ID_X_Y(5, 11, 2), mmDCORE2_RTR3_CTRL_BASE,
218 				GAUDI2_DCORE2_ENGINE_ID_TPC_0, "TPC12"},
219 		{RAZWI_INITIATOR_ID_X_Y(5, 11, 4), mmDCORE2_RTR3_CTRL_BASE,
220 				GAUDI2_DCORE2_ENGINE_ID_TPC_1, "TPC13"},
221 		{RAZWI_INITIATOR_ID_X_Y(4, 11, 2), mmDCORE2_RTR2_CTRL_BASE,
222 				GAUDI2_DCORE2_ENGINE_ID_TPC_2, "TPC14"},
223 		{RAZWI_INITIATOR_ID_X_Y(4, 11, 4), mmDCORE2_RTR2_CTRL_BASE,
224 				GAUDI2_DCORE2_ENGINE_ID_TPC_3, "TPC15"},
225 		{RAZWI_INITIATOR_ID_X_Y(3, 11, 2), mmDCORE2_RTR1_CTRL_BASE,
226 				GAUDI2_DCORE2_ENGINE_ID_TPC_4, "TPC16"},
227 		{RAZWI_INITIATOR_ID_X_Y(3, 11, 4), mmDCORE2_RTR1_CTRL_BASE,
228 				GAUDI2_DCORE2_ENGINE_ID_TPC_5, "TPC17"},
229 		{RAZWI_INITIATOR_ID_X_Y(14, 11, 14), mmDCORE3_RTR4_CTRL_BASE,
230 				GAUDI2_DCORE3_ENGINE_ID_TPC_0, "TPC18"},
231 		{RAZWI_INITIATOR_ID_X_Y(14, 11, 16), mmDCORE3_RTR4_CTRL_BASE,
232 				GAUDI2_DCORE3_ENGINE_ID_TPC_1, "TPC19"},
233 		{RAZWI_INITIATOR_ID_X_Y(15, 11, 14), mmDCORE3_RTR5_CTRL_BASE,
234 				GAUDI2_DCORE3_ENGINE_ID_TPC_2, "TPC20"},
235 		{RAZWI_INITIATOR_ID_X_Y(15, 11, 16), mmDCORE3_RTR5_CTRL_BASE,
236 				GAUDI2_DCORE3_ENGINE_ID_TPC_3, "TPC21"},
237 		{RAZWI_INITIATOR_ID_X_Y(16, 11, 14), mmDCORE3_RTR6_CTRL_BASE,
238 				GAUDI2_DCORE3_ENGINE_ID_TPC_4, "TPC22"},
239 		{RAZWI_INITIATOR_ID_X_Y(16, 11, 16), mmDCORE3_RTR6_CTRL_BASE,
240 				GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC23"},
241 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
242 				GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC24"},
243 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 8), mmDCORE1_RTR7_CTRL_BASE,
244 				GAUDI2_ENGINE_ID_NIC0_0, "NIC0"},
245 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 10), mmDCORE1_RTR7_CTRL_BASE,
246 				GAUDI2_ENGINE_ID_NIC0_1, "NIC1"},
247 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 12), mmDCORE1_RTR7_CTRL_BASE,
248 				GAUDI2_ENGINE_ID_NIC1_0, "NIC2"},
249 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
250 				GAUDI2_ENGINE_ID_NIC1_1, "NIC3"},
251 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 15), mmDCORE1_RTR7_CTRL_BASE,
252 				GAUDI2_ENGINE_ID_NIC2_0, "NIC4"},
253 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
254 				GAUDI2_ENGINE_ID_NIC2_1, "NIC5"},
255 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
256 				GAUDI2_ENGINE_ID_NIC3_0, "NIC6"},
257 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 6), mmDCORE2_RTR0_CTRL_BASE,
258 				GAUDI2_ENGINE_ID_NIC3_1, "NIC7"},
259 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 8), mmDCORE2_RTR0_CTRL_BASE,
260 				GAUDI2_ENGINE_ID_NIC4_0, "NIC8"},
261 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 12), mmDCORE3_RTR7_CTRL_BASE,
262 				GAUDI2_ENGINE_ID_NIC4_1, "NIC9"},
263 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
264 				GAUDI2_ENGINE_ID_NIC5_0, "NIC10"},
265 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
266 				GAUDI2_ENGINE_ID_NIC5_1, "NIC11"},
267 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
268 				GAUDI2_ENGINE_ID_PDMA_0, "PDMA0"},
269 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 3), mmDCORE0_RTR0_CTRL_BASE,
270 				GAUDI2_ENGINE_ID_PDMA_1, "PDMA1"},
271 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
272 				GAUDI2_ENGINE_ID_SIZE, "PMMU"},
273 		{RAZWI_INITIATOR_ID_X_Y(2, 4, 5), mmDCORE0_RTR0_CTRL_BASE,
274 				GAUDI2_ENGINE_ID_SIZE, "PCIE"},
275 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 16), mmDCORE1_RTR7_CTRL_BASE,
276 				GAUDI2_ENGINE_ID_ARC_FARM, "ARC_FARM"},
277 		{RAZWI_INITIATOR_ID_X_Y(17, 4, 17), mmDCORE1_RTR7_CTRL_BASE,
278 				GAUDI2_ENGINE_ID_KDMA, "KDMA"},
279 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF1_RTR_CTRL_BASE,
280 				GAUDI2_DCORE0_ENGINE_ID_EDMA_0, "EDMA0"},
281 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE,
282 				GAUDI2_DCORE0_ENGINE_ID_EDMA_1, "EDMA1"},
283 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF1_RTR_CTRL_BASE,
284 				GAUDI2_DCORE1_ENGINE_ID_EDMA_0, "EDMA2"},
285 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF0_RTR_CTRL_BASE,
286 				GAUDI2_DCORE1_ENGINE_ID_EDMA_1, "EDMA3"},
287 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
288 				GAUDI2_DCORE2_ENGINE_ID_EDMA_0, "EDMA4"},
289 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
290 				GAUDI2_DCORE2_ENGINE_ID_EDMA_1, "EDMA5"},
291 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
292 				GAUDI2_DCORE3_ENGINE_ID_EDMA_0, "EDMA6"},
293 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
294 				GAUDI2_DCORE3_ENGINE_ID_EDMA_1, "EDMA7"},
295 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
296 				GAUDI2_ENGINE_ID_SIZE, "HMMU0"},
297 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
298 				GAUDI2_ENGINE_ID_SIZE, "HMMU1"},
299 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
300 				GAUDI2_ENGINE_ID_SIZE, "HMMU2"},
301 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
302 				GAUDI2_ENGINE_ID_SIZE, "HMMU3"},
303 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
304 				GAUDI2_ENGINE_ID_SIZE, "HMMU4"},
305 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
306 				GAUDI2_ENGINE_ID_SIZE, "HMMU5"},
307 		{RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
308 				GAUDI2_ENGINE_ID_SIZE, "HMMU6"},
309 		{RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
310 				GAUDI2_ENGINE_ID_SIZE, "HMMU7"},
311 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
312 				GAUDI2_ENGINE_ID_SIZE, "HMMU8"},
313 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
314 				GAUDI2_ENGINE_ID_SIZE, "HMMU9"},
315 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
316 				GAUDI2_ENGINE_ID_SIZE, "HMMU10"},
317 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
318 				GAUDI2_ENGINE_ID_SIZE, "HMMU11"},
319 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
320 				GAUDI2_ENGINE_ID_SIZE, "HMMU12"},
321 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
322 				GAUDI2_ENGINE_ID_SIZE, "HMMU13"},
323 		{RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
324 				GAUDI2_ENGINE_ID_SIZE, "HMMU14"},
325 		{RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
326 				GAUDI2_ENGINE_ID_SIZE, "HMMU15"},
327 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
328 				GAUDI2_ENGINE_ID_ROT_0, "ROT0"},
329 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
330 				GAUDI2_ENGINE_ID_ROT_1, "ROT1"},
331 		{RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
332 				GAUDI2_ENGINE_ID_PSOC, "CPU"},
333 		{RAZWI_INITIATOR_ID_X_Y(17, 11, 11), mmDCORE3_RTR7_CTRL_BASE,
334 				GAUDI2_ENGINE_ID_PSOC, "PSOC"}
335 };
336 
337 static struct gaudi2_razwi_info mme_razwi_info[] = {
338 		/* MME X high coordinate is N/A, hence using only low coordinates */
339 		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
340 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP0"},
341 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
342 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP1"},
343 		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
344 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_WR"},
345 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
346 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_RD"},
347 		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
348 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE0"},
349 		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
350 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE1"},
351 		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
352 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE2"},
353 		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
354 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE3"},
355 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
356 				GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE4"},
357 		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
358 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP0"},
359 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
360 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP1"},
361 		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
362 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_WR"},
363 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
364 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_RD"},
365 		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
366 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE0"},
367 		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
368 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE1"},
369 		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
370 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE2"},
371 		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
372 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE3"},
373 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
374 				GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE4"},
375 		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
376 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP0"},
377 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
378 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP1"},
379 		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
380 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_WR"},
381 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
382 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_RD"},
383 		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
384 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE0"},
385 		{RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
386 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE1"},
387 		{RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
388 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE2"},
389 		{RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
390 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE3"},
391 		{RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
392 				GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE4"},
393 		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
394 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP0"},
395 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
396 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP1"},
397 		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
398 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_WR"},
399 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
400 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_RD"},
401 		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
402 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE0"},
403 		{RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
404 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE1"},
405 		{RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
406 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE2"},
407 		{RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
408 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE3"},
409 		{RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
410 				GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE4"}
411 };
412 
413 enum hl_pmmu_fatal_cause {
414 	LATENCY_RD_OUT_FIFO_OVERRUN,
415 	LATENCY_WR_OUT_FIFO_OVERRUN,
416 };
417 
418 enum hl_pcie_drain_ind_cause {
419 	LBW_AXI_DRAIN_IND,
420 	HBW_AXI_DRAIN_IND
421 };
422 
423 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = {
424 	[HBM_ID0] = 0xFFFC,
425 	[HBM_ID1] = 0xFFCF,
426 	[HBM_ID2] = 0xF7F7,
427 	[HBM_ID3] = 0x7F7F,
428 	[HBM_ID4] = 0xFCFF,
429 	[HBM_ID5] = 0xCFFF,
430 };
431 
432 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = {
433 	[0] = HBM_ID0,
434 	[1] = HBM_ID1,
435 	[2] = HBM_ID4,
436 	[3] = HBM_ID5,
437 };
438 
439 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = {
440 	[EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0,
441 	[EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2,
442 	[EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1,
443 	[EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3,
444 	[EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2,
445 	[EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4,
446 	[EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3,
447 	[EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5,
448 };
449 
450 static const int gaudi2_qman_async_event_id[] = {
451 	[GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM,
452 	[GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM,
453 	[GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM,
454 	[GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM,
455 	[GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM,
456 	[GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM,
457 	[GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM,
458 	[GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM,
459 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM,
460 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM,
461 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM,
462 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM,
463 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM,
464 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM,
465 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM,
466 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM,
467 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM,
468 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM,
469 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM,
470 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM,
471 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM,
472 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM,
473 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM,
474 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM,
475 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM,
476 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM,
477 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM,
478 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM,
479 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM,
480 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM,
481 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM,
482 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM,
483 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM,
484 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM,
485 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM,
486 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM,
487 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM,
488 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM,
489 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM,
490 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM,
491 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM,
492 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM,
493 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM,
494 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM,
495 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM,
496 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM,
497 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM,
498 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM,
499 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM,
500 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM,
501 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM,
502 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM,
503 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM,
504 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM,
505 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM,
506 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM,
507 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM,
508 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM,
509 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM,
510 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM,
511 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM,
512 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM,
513 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM,
514 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM,
515 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM,
516 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM,
517 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM,
518 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM,
519 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM,
520 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM,
521 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM,
522 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM,
523 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM,
524 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM,
525 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM,
526 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM,
527 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM,
528 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM,
529 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM,
530 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM,
531 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM,
532 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM,
533 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM,
534 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM,
535 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM,
536 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM,
537 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM,
538 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM,
539 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM,
540 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM,
541 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM,
542 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM,
543 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM,
544 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM,
545 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM,
546 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM,
547 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM,
548 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM,
549 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM,
550 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM,
551 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM,
552 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM,
553 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM,
554 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM,
555 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM,
556 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM,
557 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM,
558 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM,
559 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM,
560 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM,
561 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM,
562 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM,
563 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM,
564 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM,
565 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM,
566 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM,
567 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM,
568 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM,
569 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM,
570 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM,
571 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM,
572 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM,
573 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM,
574 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM,
575 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM,
576 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM,
577 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM,
578 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM,
579 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM,
580 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM,
581 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM,
582 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM,
583 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM,
584 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM,
585 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM,
586 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM,
587 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM,
588 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM,
589 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM,
590 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM,
591 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM,
592 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM,
593 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM,
594 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM,
595 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM,
596 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM,
597 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM,
598 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM,
599 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM,
600 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM,
601 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM,
602 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM,
603 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM,
604 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM,
605 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM,
606 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM,
607 	[GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0,
608 	[GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0,
609 	[GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0,
610 	[GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0,
611 	[GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1,
612 	[GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1,
613 	[GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1,
614 	[GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1,
615 	[GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0,
616 	[GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0,
617 	[GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0,
618 	[GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0,
619 	[GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1,
620 	[GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1,
621 	[GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1,
622 	[GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1,
623 	[GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0,
624 	[GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0,
625 	[GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0,
626 	[GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0,
627 	[GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1,
628 	[GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1,
629 	[GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1,
630 	[GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1,
631 	[GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0,
632 	[GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0,
633 	[GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0,
634 	[GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0,
635 	[GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1,
636 	[GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1,
637 	[GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1,
638 	[GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1,
639 	[GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0,
640 	[GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0,
641 	[GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0,
642 	[GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0,
643 	[GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1,
644 	[GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1,
645 	[GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1,
646 	[GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1,
647 	[GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0,
648 	[GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0,
649 	[GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0,
650 	[GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0,
651 	[GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1,
652 	[GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1,
653 	[GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1,
654 	[GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1,
655 	[GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0,
656 	[GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0,
657 	[GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0,
658 	[GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0,
659 	[GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1,
660 	[GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1,
661 	[GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1,
662 	[GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1,
663 	[GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0,
664 	[GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0,
665 	[GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0,
666 	[GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0,
667 	[GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1,
668 	[GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1,
669 	[GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1,
670 	[GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1,
671 	[GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0,
672 	[GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0,
673 	[GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0,
674 	[GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0,
675 	[GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1,
676 	[GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1,
677 	[GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1,
678 	[GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1,
679 	[GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0,
680 	[GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0,
681 	[GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0,
682 	[GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0,
683 	[GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1,
684 	[GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1,
685 	[GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1,
686 	[GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1,
687 	[GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0,
688 	[GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0,
689 	[GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0,
690 	[GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0,
691 	[GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1,
692 	[GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1,
693 	[GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1,
694 	[GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1,
695 	[GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0,
696 	[GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0,
697 	[GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0,
698 	[GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0,
699 	[GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1,
700 	[GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1,
701 	[GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1,
702 	[GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1,
703 	[GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
704 	[GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
705 	[GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
706 	[GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
707 	[GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
708 	[GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
709 	[GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
710 	[GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM
711 };
712 
713 static const int gaudi2_dma_core_async_event_id[] = {
714 	[DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE,
715 	[DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE,
716 	[DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE,
717 	[DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE,
718 	[DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE,
719 	[DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE,
720 	[DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE,
721 	[DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE,
722 	[DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE,
723 	[DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE,
724 	[DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
725 };
726 
727 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
728 	"qman sei intr",
729 	"arc sei intr"
730 };
731 
732 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = {
733 	"AXI_TERMINATOR WR",
734 	"AXI_TERMINATOR RD",
735 	"AXI SPLIT SEI Status"
736 };
737 
738 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = {
739 	"cbu_bresp_sei_intr_cause",
740 	"cbu_rresp_sei_intr_cause",
741 	"lbu_bresp_sei_intr_cause",
742 	"lbu_rresp_sei_intr_cause",
743 	"cbu_axi_split_intr_cause",
744 	"lbu_axi_split_intr_cause",
745 	"arc_ip_excptn_sei_intr_cause",
746 	"dmi_bresp_sei_intr_cause",
747 	"aux2apb_err_sei_intr_cause",
748 	"cfg_lbw_wr_terminated_intr_cause",
749 	"cfg_lbw_rd_terminated_intr_cause",
750 	"cfg_dccm_wr_terminated_intr_cause",
751 	"cfg_dccm_rd_terminated_intr_cause",
752 	"cfg_hbw_rd_terminated_intr_cause"
753 };
754 
755 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = {
756 	"msix_vcd_hbw_sei",
757 	"msix_l2c_hbw_sei",
758 	"msix_nrm_hbw_sei",
759 	"msix_abnrm_hbw_sei",
760 	"msix_vcd_lbw_sei",
761 	"msix_l2c_lbw_sei",
762 	"msix_nrm_lbw_sei",
763 	"msix_abnrm_lbw_sei",
764 	"apb_vcd_lbw_sei",
765 	"apb_l2c_lbw_sei",
766 	"apb_nrm_lbw_sei",
767 	"apb_abnrm_lbw_sei",
768 	"dec_sei",
769 	"dec_apb_sei",
770 	"trc_apb_sei",
771 	"lbw_mstr_if_sei",
772 	"axi_split_bresp_err_sei",
773 	"hbw_axi_wr_viol_sei",
774 	"hbw_axi_rd_viol_sei",
775 	"lbw_axi_wr_viol_sei",
776 	"lbw_axi_rd_viol_sei",
777 	"vcd_spi",
778 	"l2c_spi",
779 	"nrm_spi",
780 	"abnrm_spi",
781 };
782 
783 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = {
784 	"PQ AXI HBW error",
785 	"CQ AXI HBW error",
786 	"CP AXI HBW error",
787 	"CP error due to undefined OPCODE",
788 	"CP encountered STOP OPCODE",
789 	"CP AXI LBW error",
790 	"CP WRREG32 or WRBULK returned error",
791 	"N/A",
792 	"FENCE 0 inc over max value and clipped",
793 	"FENCE 1 inc over max value and clipped",
794 	"FENCE 2 inc over max value and clipped",
795 	"FENCE 3 inc over max value and clipped",
796 	"FENCE 0 dec under min value and clipped",
797 	"FENCE 1 dec under min value and clipped",
798 	"FENCE 2 dec under min value and clipped",
799 	"FENCE 3 dec under min value and clipped",
800 	"CPDMA Up overflow",
801 	"PQC L2H error"
802 };
803 
804 static const char * const gaudi2_qman_lower_cp_error_cause[GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE] = {
805 	"RSVD0",
806 	"CQ AXI HBW error",
807 	"CP AXI HBW error",
808 	"CP error due to undefined OPCODE",
809 	"CP encountered STOP OPCODE",
810 	"CP AXI LBW error",
811 	"CP WRREG32 or WRBULK returned error",
812 	"N/A",
813 	"FENCE 0 inc over max value and clipped",
814 	"FENCE 1 inc over max value and clipped",
815 	"FENCE 2 inc over max value and clipped",
816 	"FENCE 3 inc over max value and clipped",
817 	"FENCE 0 dec under min value and clipped",
818 	"FENCE 1 dec under min value and clipped",
819 	"FENCE 2 dec under min value and clipped",
820 	"FENCE 3 dec under min value and clipped",
821 	"CPDMA Up overflow",
822 	"RSVD17",
823 	"CQ_WR_IFIFO_CI_ERR",
824 	"CQ_WR_CTL_CI_ERR",
825 	"ARC_CQF_RD_ERR",
826 	"ARC_CQ_WR_IFIFO_CI_ERR",
827 	"ARC_CQ_WR_CTL_CI_ERR",
828 	"ARC_AXI_ERR",
829 	"CP_SWITCH_WDT_ERR"
830 };
831 
832 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = {
833 	"Choice push while full error",
834 	"Choice Q watchdog error",
835 	"MSG AXI LBW returned with error"
836 };
837 
838 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = {
839 	"qm_axi_err",
840 	"qm_trace_fence_events",
841 	"qm_sw_err",
842 	"qm_cp_sw_stop",
843 	"lbw_mstr_rresp_err",
844 	"lbw_mstr_bresp_err",
845 	"lbw_msg_slverr",
846 	"hbw_msg_slverr",
847 	"wbc_slverr",
848 	"hbw_mstr_rresp_err",
849 	"hbw_mstr_bresp_err",
850 	"sb_resp_intr",
851 	"mrsb_resp_intr",
852 	"core_dw_status_0",
853 	"core_dw_status_1",
854 	"core_dw_status_2",
855 	"core_dw_status_3",
856 	"core_dw_status_4",
857 	"core_dw_status_5",
858 	"core_dw_status_6",
859 	"core_dw_status_7",
860 	"async_arc2cpu_sei_intr",
861 };
862 
863 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = {
864 	"tpc_address_exceed_slm",
865 	"tpc_div_by_0",
866 	"tpc_spu_mac_overflow",
867 	"tpc_spu_addsub_overflow",
868 	"tpc_spu_abs_overflow",
869 	"tpc_spu_fma_fp_dst_nan",
870 	"tpc_spu_fma_fp_dst_inf",
871 	"tpc_spu_convert_fp_dst_nan",
872 	"tpc_spu_convert_fp_dst_inf",
873 	"tpc_spu_fp_dst_denorm",
874 	"tpc_vpu_mac_overflow",
875 	"tpc_vpu_addsub_overflow",
876 	"tpc_vpu_abs_overflow",
877 	"tpc_vpu_convert_fp_dst_nan",
878 	"tpc_vpu_convert_fp_dst_inf",
879 	"tpc_vpu_fma_fp_dst_nan",
880 	"tpc_vpu_fma_fp_dst_inf",
881 	"tpc_vpu_fp_dst_denorm",
882 	"tpc_assertions",
883 	"tpc_illegal_instruction",
884 	"tpc_pc_wrap_around",
885 	"tpc_qm_sw_err",
886 	"tpc_hbw_rresp_err",
887 	"tpc_hbw_bresp_err",
888 	"tpc_lbw_rresp_err",
889 	"tpc_lbw_bresp_err",
890 	"st_unlock_already_locked",
891 	"invalid_lock_access",
892 	"LD_L protection violation",
893 	"ST_L protection violation",
894 	"D$ L0CS mismatch",
895 };
896 
897 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
898 	"agu_resp_intr",
899 	"qman_axi_err",
900 	"wap sei (wbc axi err)",
901 	"arc sei",
902 	"cfg access error",
903 	"qm_sw_err",
904 	"sbte_dbg_intr_0",
905 	"sbte_dbg_intr_1",
906 	"sbte_dbg_intr_2",
907 	"sbte_dbg_intr_3",
908 	"sbte_dbg_intr_4",
909 	"sbte_prtn_intr_0",
910 	"sbte_prtn_intr_1",
911 	"sbte_prtn_intr_2",
912 	"sbte_prtn_intr_3",
913 	"sbte_prtn_intr_4",
914 };
915 
916 static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = {
917 	"i0",
918 	"i1",
919 	"i2",
920 	"i3",
921 	"i4",
922 };
923 
924 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
925 	"WBC ERR RESP_0",
926 	"WBC ERR RESP_1",
927 	"AP SOURCE POS INF",
928 	"AP SOURCE NEG INF",
929 	"AP SOURCE NAN",
930 	"AP RESULT POS INF",
931 	"AP RESULT NEG INF",
932 };
933 
934 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
935 	"HBW Read returned with error RRESP",
936 	"HBW write returned with error BRESP",
937 	"LBW write returned with error BRESP",
938 	"descriptor_fifo_overflow",
939 	"KDMA SB LBW Read returned with error",
940 	"KDMA WBC LBW Write returned with error",
941 	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
942 	"WRONG CFG FOR COMMIT IN LIN DMA"
943 };
944 
945 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
946 	"HBW/LBW Read returned with error RRESP",
947 	"HBW/LBW write returned with error BRESP",
948 	"LBW write returned with error BRESP",
949 	"descriptor_fifo_overflow",
950 	"KDMA SB LBW Read returned with error",
951 	"KDMA WBC LBW Write returned with error",
952 	"TRANSPOSE ENGINE DESC FIFO OVERFLOW",
953 	"WRONG CFG FOR COMMIT IN LIN DMA"
954 };
955 
956 struct gaudi2_sm_sei_cause_data {
957 	const char *cause_name;
958 	const char *log_name;
959 };
960 
961 static const struct gaudi2_sm_sei_cause_data
962 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = {
963 	{"calculated SO value overflow/underflow", "SOB ID"},
964 	{"payload address of monitor is not aligned to 4B", "monitor addr"},
965 	{"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id"},
966 };
967 
968 static const char * const
969 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = {
970 	"LATENCY_RD_OUT_FIFO_OVERRUN",
971 	"LATENCY_WR_OUT_FIFO_OVERRUN",
972 };
973 
974 static const char * const
975 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = {
976 	"LATENCY_RD_OUT_FIFO_OVERRUN",
977 	"LATENCY_WR_OUT_FIFO_OVERRUN",
978 };
979 
980 static const char * const
981 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = {
982 	"AXI drain HBW",
983 	"AXI drain LBW",
984 };
985 
986 static const char * const
987 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
988 	"HBW error response",
989 	"LBW error response",
990 	"TLP is blocked by RR"
991 };
992 
993 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
994 	[GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
995 	[GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
996 	[GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE,
997 	[GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE,
998 	[GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE,
999 	[GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE,
1000 	[GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE,
1001 	[GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE,
1002 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE,
1003 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE,
1004 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE,
1005 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE,
1006 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE,
1007 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE,
1008 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE,
1009 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE,
1010 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE,
1011 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE,
1012 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE,
1013 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE,
1014 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE,
1015 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE,
1016 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE,
1017 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE,
1018 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE,
1019 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE,
1020 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE,
1021 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE,
1022 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE,
1023 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE,
1024 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE,
1025 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE,
1026 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE,
1027 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE,
1028 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE,
1029 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE,
1030 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE,
1031 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE,
1032 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE,
1033 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE,
1034 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE,
1035 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE,
1036 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE,
1037 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE,
1038 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE,
1039 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE,
1040 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE,
1041 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE,
1042 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE,
1043 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE,
1044 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE,
1045 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE,
1046 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE,
1047 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE,
1048 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE,
1049 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE,
1050 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE,
1051 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE,
1052 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE,
1053 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE,
1054 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE,
1055 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE,
1056 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE,
1057 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE,
1058 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE,
1059 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE,
1060 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE,
1061 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE,
1062 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE,
1063 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE,
1064 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE,
1065 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE,
1066 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE,
1067 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE,
1068 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE,
1069 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE,
1070 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE,
1071 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE,
1072 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE,
1073 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE,
1074 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE,
1075 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE,
1076 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE,
1077 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE,
1078 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE,
1079 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE,
1080 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE,
1081 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE,
1082 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE,
1083 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE,
1084 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE,
1085 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE,
1086 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE,
1087 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE,
1088 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE,
1089 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE,
1090 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE,
1091 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE,
1092 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE,
1093 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE,
1094 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE,
1095 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE,
1096 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE,
1097 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE,
1098 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE,
1099 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE,
1100 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE,
1101 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE,
1102 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE,
1103 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE,
1104 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE,
1105 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE,
1106 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE,
1107 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE,
1108 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE,
1109 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE,
1110 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE,
1111 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE,
1112 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE,
1113 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE,
1114 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE,
1115 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE,
1116 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE,
1117 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE,
1118 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE,
1119 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE,
1120 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE,
1121 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE,
1122 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE,
1123 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE,
1124 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE,
1125 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE,
1126 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE,
1127 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE,
1128 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE,
1129 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE,
1130 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE,
1131 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE,
1132 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE,
1133 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE,
1134 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE,
1135 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE,
1136 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE,
1137 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE,
1138 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE,
1139 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE,
1140 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE,
1141 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE,
1142 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE,
1143 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE,
1144 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE,
1145 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE,
1146 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE,
1147 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE,
1148 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE,
1149 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE,
1150 	[GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE,
1151 	[GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE,
1152 	[GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE,
1153 	[GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE,
1154 	[GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE,
1155 	[GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE,
1156 	[GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE,
1157 	[GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE,
1158 	[GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE,
1159 	[GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE,
1160 	[GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE,
1161 	[GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE,
1162 	[GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE,
1163 	[GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE,
1164 	[GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE,
1165 	[GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE,
1166 	[GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE,
1167 	[GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE,
1168 	[GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE,
1169 	[GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE,
1170 	[GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE,
1171 	[GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE,
1172 	[GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE,
1173 	[GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE,
1174 	[GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE,
1175 	[GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE,
1176 	[GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE,
1177 	[GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE,
1178 	[GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE,
1179 	[GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE,
1180 	[GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE,
1181 	[GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE,
1182 	[GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE,
1183 	[GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE,
1184 	[GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE,
1185 	[GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE,
1186 	[GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE,
1187 	[GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE,
1188 	[GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE,
1189 	[GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE,
1190 	[GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE,
1191 	[GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE,
1192 	[GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE,
1193 	[GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE,
1194 	[GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE,
1195 	[GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE,
1196 	[GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE,
1197 	[GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE,
1198 	[GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE,
1199 	[GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE,
1200 	[GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE,
1201 	[GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE,
1202 	[GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE,
1203 	[GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE,
1204 	[GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE,
1205 	[GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE,
1206 	[GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE,
1207 	[GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE,
1208 	[GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE,
1209 	[GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE,
1210 	[GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE,
1211 	[GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE,
1212 	[GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE,
1213 	[GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE,
1214 	[GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE,
1215 	[GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE,
1216 	[GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE,
1217 	[GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE,
1218 	[GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE,
1219 	[GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE,
1220 	[GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE,
1221 	[GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE,
1222 	[GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE,
1223 	[GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE,
1224 	[GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE,
1225 	[GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE,
1226 	[GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE,
1227 	[GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE,
1228 	[GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE,
1229 	[GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE,
1230 	[GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE,
1231 	[GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE,
1232 	[GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE,
1233 	[GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE,
1234 	[GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE,
1235 	[GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE,
1236 	[GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE,
1237 	[GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE,
1238 	[GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE,
1239 	[GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE,
1240 	[GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE,
1241 	[GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE,
1242 	[GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE,
1243 	[GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE,
1244 	[GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE,
1245 	[GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE,
1246 	[GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE,
1247 	[GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE,
1248 	[GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE,
1249 	[GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE,
1250 	[GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE,
1251 	[GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE,
1252 	[GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE,
1253 	[GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE
1254 };
1255 
1256 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = {
1257 	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE,
1258 	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE,
1259 	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE,
1260 	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE,
1261 	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE,
1262 	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE,
1263 	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE,
1264 	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE,
1265 	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE,
1266 	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE,
1267 	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE,
1268 	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE,
1269 	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE,
1270 	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE,
1271 	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE,
1272 	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE,
1273 	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE,
1274 	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE,
1275 	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE,
1276 	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE,
1277 	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE,
1278 	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE,
1279 	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE,
1280 	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE,
1281 	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE,
1282 	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE,
1283 	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE,
1284 	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE,
1285 	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE,
1286 	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE,
1287 	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE,
1288 	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE,
1289 	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE,
1290 	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE,
1291 	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE,
1292 	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE,
1293 	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE,
1294 	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE,
1295 	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE,
1296 	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE,
1297 	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE,
1298 	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE,
1299 	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE,
1300 	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE,
1301 	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE,
1302 	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE,
1303 	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE,
1304 	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE,
1305 	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE,
1306 	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE,
1307 	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE,
1308 	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE,
1309 	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE,
1310 	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE,
1311 	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE,
1312 	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE,
1313 	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE,
1314 	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE,
1315 	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE,
1316 	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE,
1317 	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE,
1318 	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE,
1319 	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE,
1320 	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE,
1321 	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE,
1322 	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE,
1323 	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE,
1324 	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE,
1325 	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE,
1326 };
1327 
1328 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = {
1329 	[CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE,
1330 	[CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE,
1331 	[CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE,
1332 	[CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE,
1333 	[CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE,
1334 	[CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE,
1335 	[CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE,
1336 	[CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE,
1337 	[CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE,
1338 	[CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE,
1339 	[CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE,
1340 	[CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE,
1341 	[CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE,
1342 	[CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE,
1343 	[CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE,
1344 	[CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE,
1345 	[CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE,
1346 	[CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE,
1347 	[CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE,
1348 	[CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE,
1349 	[CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE,
1350 	[CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE,
1351 	[CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE,
1352 	[CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE,
1353 	[CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE,
1354 	[CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE,
1355 	[CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE,
1356 	[CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE,
1357 	[CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE,
1358 	[CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE,
1359 	[CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE,
1360 	[CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE,
1361 	[CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE,
1362 	[CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE,
1363 	[CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE,
1364 	[CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE,
1365 	[CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE,
1366 	[CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE,
1367 	[CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE,
1368 	[CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE,
1369 	[CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE,
1370 	[CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE,
1371 	[CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE,
1372 	[CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE,
1373 	[CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE,
1374 	[CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE,
1375 	[CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE,
1376 	[CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE,
1377 	[CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE,
1378 	[CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE,
1379 	[CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE,
1380 	[CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE,
1381 	[CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE,
1382 	[CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE,
1383 	[CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE,
1384 	[CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE,
1385 	[CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE,
1386 	[CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE,
1387 	[CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE,
1388 	[CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE,
1389 	[CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE,
1390 	[CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE,
1391 	[CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE,
1392 	[CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE,
1393 	[CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE,
1394 	[CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE,
1395 	[CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE,
1396 	[CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE,
1397 	[CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE,
1398 };
1399 
1400 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = {
1401 	[MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE,
1402 	[MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE,
1403 	[MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE,
1404 	[MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE,
1405 };
1406 
1407 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = {
1408 	[GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0,
1409 	[GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0,
1410 	[GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0,
1411 	[GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0,
1412 	[GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1,
1413 	[GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1,
1414 	[GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1,
1415 	[GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1,
1416 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0,
1417 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0,
1418 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0,
1419 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0,
1420 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1,
1421 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1,
1422 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1,
1423 	[GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1,
1424 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0,
1425 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0,
1426 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0,
1427 	[GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0,
1428 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0,
1429 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0,
1430 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0,
1431 	[GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0,
1432 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1,
1433 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1,
1434 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1,
1435 	[GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1,
1436 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2,
1437 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2,
1438 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2,
1439 	[GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2,
1440 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3,
1441 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3,
1442 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3,
1443 	[GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3,
1444 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4,
1445 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4,
1446 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4,
1447 	[GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4,
1448 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5,
1449 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5,
1450 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5,
1451 	[GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5,
1452 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24,
1453 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24,
1454 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24,
1455 	[GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24,
1456 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2,
1457 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2,
1458 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2,
1459 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2,
1460 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3,
1461 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3,
1462 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3,
1463 	[GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3,
1464 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4,
1465 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4,
1466 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4,
1467 	[GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4,
1468 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6,
1469 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6,
1470 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6,
1471 	[GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6,
1472 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7,
1473 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7,
1474 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7,
1475 	[GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7,
1476 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8,
1477 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8,
1478 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8,
1479 	[GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8,
1480 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9,
1481 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9,
1482 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9,
1483 	[GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9,
1484 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10,
1485 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10,
1486 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10,
1487 	[GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10,
1488 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11,
1489 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11,
1490 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11,
1491 	[GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11,
1492 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4,
1493 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4,
1494 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4,
1495 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4,
1496 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5,
1497 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5,
1498 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5,
1499 	[GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5,
1500 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1,
1501 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1,
1502 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1,
1503 	[GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1,
1504 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12,
1505 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12,
1506 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12,
1507 	[GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12,
1508 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13,
1509 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13,
1510 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13,
1511 	[GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13,
1512 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14,
1513 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14,
1514 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14,
1515 	[GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14,
1516 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15,
1517 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15,
1518 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15,
1519 	[GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15,
1520 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16,
1521 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16,
1522 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16,
1523 	[GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16,
1524 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17,
1525 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17,
1526 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17,
1527 	[GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17,
1528 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6,
1529 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6,
1530 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6,
1531 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6,
1532 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7,
1533 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7,
1534 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7,
1535 	[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7,
1536 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5,
1537 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5,
1538 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5,
1539 	[GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5,
1540 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18,
1541 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18,
1542 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18,
1543 	[GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18,
1544 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19,
1545 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19,
1546 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19,
1547 	[GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19,
1548 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20,
1549 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20,
1550 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20,
1551 	[GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20,
1552 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21,
1553 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21,
1554 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21,
1555 	[GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21,
1556 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22,
1557 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22,
1558 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22,
1559 	[GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22,
1560 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23,
1561 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23,
1562 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23,
1563 	[GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23,
1564 	[GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0,
1565 	[GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0,
1566 	[GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0,
1567 	[GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0,
1568 	[GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1,
1569 	[GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1,
1570 	[GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1,
1571 	[GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1,
1572 	[GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2,
1573 	[GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2,
1574 	[GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2,
1575 	[GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2,
1576 	[GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3,
1577 	[GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3,
1578 	[GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3,
1579 	[GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3,
1580 	[GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4,
1581 	[GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4,
1582 	[GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4,
1583 	[GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4,
1584 	[GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5,
1585 	[GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5,
1586 	[GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5,
1587 	[GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5,
1588 	[GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6,
1589 	[GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6,
1590 	[GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6,
1591 	[GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6,
1592 	[GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7,
1593 	[GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7,
1594 	[GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7,
1595 	[GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7,
1596 	[GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8,
1597 	[GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8,
1598 	[GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8,
1599 	[GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8,
1600 	[GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9,
1601 	[GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9,
1602 	[GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9,
1603 	[GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9,
1604 	[GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10,
1605 	[GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10,
1606 	[GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10,
1607 	[GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10,
1608 	[GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11,
1609 	[GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11,
1610 	[GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11,
1611 	[GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11,
1612 	[GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12,
1613 	[GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12,
1614 	[GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12,
1615 	[GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12,
1616 	[GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13,
1617 	[GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13,
1618 	[GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13,
1619 	[GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13,
1620 	[GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14,
1621 	[GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14,
1622 	[GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14,
1623 	[GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14,
1624 	[GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15,
1625 	[GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15,
1626 	[GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15,
1627 	[GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15,
1628 	[GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16,
1629 	[GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16,
1630 	[GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16,
1631 	[GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16,
1632 	[GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17,
1633 	[GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17,
1634 	[GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17,
1635 	[GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17,
1636 	[GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18,
1637 	[GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18,
1638 	[GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18,
1639 	[GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18,
1640 	[GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19,
1641 	[GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19,
1642 	[GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19,
1643 	[GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19,
1644 	[GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20,
1645 	[GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20,
1646 	[GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20,
1647 	[GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20,
1648 	[GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21,
1649 	[GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21,
1650 	[GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21,
1651 	[GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21,
1652 	[GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22,
1653 	[GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22,
1654 	[GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22,
1655 	[GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22,
1656 	[GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23,
1657 	[GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23,
1658 	[GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23,
1659 	[GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23,
1660 	[GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0,
1661 	[GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0,
1662 	[GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0,
1663 	[GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0,
1664 	[GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1,
1665 	[GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1,
1666 	[GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1,
1667 	[GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1
1668 };
1669 
1670 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = {
1671 	[DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE,
1672 	[DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE,
1673 	[DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE,
1674 	[DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE,
1675 	[DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE,
1676 	[DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE,
1677 	[DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE,
1678 	[DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE,
1679 	[DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE,
1680 	[DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE,
1681 	[DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE
1682 };
1683 
1684 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = {
1685 	[MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE,
1686 	[MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE,
1687 	[MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE,
1688 	[MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE
1689 };
1690 
1691 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
1692 	[TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE,
1693 	[TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE,
1694 	[TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE,
1695 	[TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE,
1696 	[TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE,
1697 	[TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE,
1698 	[TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE,
1699 	[TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE,
1700 	[TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE,
1701 	[TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE,
1702 	[TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE,
1703 	[TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE,
1704 	[TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE,
1705 	[TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE,
1706 	[TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE,
1707 	[TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE,
1708 	[TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE,
1709 	[TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE,
1710 	[TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE,
1711 	[TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE,
1712 	[TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE,
1713 	[TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE,
1714 	[TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE,
1715 	[TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE,
1716 	[TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
1717 };
1718 
1719 static const u32 gaudi2_tpc_eml_cfg_blocks_bases[TPC_ID_SIZE] = {
1720 	[TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_EML_CFG_BASE,
1721 	[TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_EML_CFG_BASE,
1722 	[TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_EML_CFG_BASE,
1723 	[TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_EML_CFG_BASE,
1724 	[TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_EML_CFG_BASE,
1725 	[TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_EML_CFG_BASE,
1726 	[TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_EML_CFG_BASE,
1727 	[TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_EML_CFG_BASE,
1728 	[TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_EML_CFG_BASE,
1729 	[TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_EML_CFG_BASE,
1730 	[TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_EML_CFG_BASE,
1731 	[TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_EML_CFG_BASE,
1732 	[TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_EML_CFG_BASE,
1733 	[TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_EML_CFG_BASE,
1734 	[TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_EML_CFG_BASE,
1735 	[TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_EML_CFG_BASE,
1736 	[TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_EML_CFG_BASE,
1737 	[TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_EML_CFG_BASE,
1738 	[TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_EML_CFG_BASE,
1739 	[TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_EML_CFG_BASE,
1740 	[TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_EML_CFG_BASE,
1741 	[TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_EML_CFG_BASE,
1742 	[TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_EML_CFG_BASE,
1743 	[TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_EML_CFG_BASE,
1744 	[TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_EML_CFG_BASE,
1745 };
1746 
1747 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
1748 	[ROTATOR_ID_0] = mmROT0_BASE,
1749 	[ROTATOR_ID_1] = mmROT1_BASE
1750 };
1751 
1752 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = {
1753 	[TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0,
1754 	[TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0,
1755 	[TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0,
1756 	[TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0,
1757 	[TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0,
1758 	[TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0,
1759 	[TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0,
1760 	[TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0,
1761 	[TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0,
1762 	[TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0,
1763 	[TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0,
1764 	[TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0,
1765 	[TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0,
1766 	[TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0,
1767 	[TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0,
1768 	[TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0,
1769 	[TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0,
1770 	[TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0,
1771 	[TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0,
1772 	[TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0,
1773 	[TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0,
1774 	[TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0,
1775 	[TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0,
1776 	[TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0,
1777 	[TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0,
1778 };
1779 
1780 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
1781 	[ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0,
1782 	[ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
1783 };
1784 
1785 static const u32 gaudi2_tpc_engine_id_to_tpc_id[] = {
1786 	[GAUDI2_DCORE0_ENGINE_ID_TPC_0] = TPC_ID_DCORE0_TPC0,
1787 	[GAUDI2_DCORE0_ENGINE_ID_TPC_1] = TPC_ID_DCORE0_TPC1,
1788 	[GAUDI2_DCORE0_ENGINE_ID_TPC_2] = TPC_ID_DCORE0_TPC2,
1789 	[GAUDI2_DCORE0_ENGINE_ID_TPC_3] = TPC_ID_DCORE0_TPC3,
1790 	[GAUDI2_DCORE0_ENGINE_ID_TPC_4] = TPC_ID_DCORE0_TPC4,
1791 	[GAUDI2_DCORE0_ENGINE_ID_TPC_5] = TPC_ID_DCORE0_TPC5,
1792 	[GAUDI2_DCORE1_ENGINE_ID_TPC_0] = TPC_ID_DCORE1_TPC0,
1793 	[GAUDI2_DCORE1_ENGINE_ID_TPC_1] = TPC_ID_DCORE1_TPC1,
1794 	[GAUDI2_DCORE1_ENGINE_ID_TPC_2] = TPC_ID_DCORE1_TPC2,
1795 	[GAUDI2_DCORE1_ENGINE_ID_TPC_3] = TPC_ID_DCORE1_TPC3,
1796 	[GAUDI2_DCORE1_ENGINE_ID_TPC_4] = TPC_ID_DCORE1_TPC4,
1797 	[GAUDI2_DCORE1_ENGINE_ID_TPC_5] = TPC_ID_DCORE1_TPC5,
1798 	[GAUDI2_DCORE2_ENGINE_ID_TPC_0] = TPC_ID_DCORE2_TPC0,
1799 	[GAUDI2_DCORE2_ENGINE_ID_TPC_1] = TPC_ID_DCORE2_TPC1,
1800 	[GAUDI2_DCORE2_ENGINE_ID_TPC_2] = TPC_ID_DCORE2_TPC2,
1801 	[GAUDI2_DCORE2_ENGINE_ID_TPC_3] = TPC_ID_DCORE2_TPC3,
1802 	[GAUDI2_DCORE2_ENGINE_ID_TPC_4] = TPC_ID_DCORE2_TPC4,
1803 	[GAUDI2_DCORE2_ENGINE_ID_TPC_5] = TPC_ID_DCORE2_TPC5,
1804 	[GAUDI2_DCORE3_ENGINE_ID_TPC_0] = TPC_ID_DCORE3_TPC0,
1805 	[GAUDI2_DCORE3_ENGINE_ID_TPC_1] = TPC_ID_DCORE3_TPC1,
1806 	[GAUDI2_DCORE3_ENGINE_ID_TPC_2] = TPC_ID_DCORE3_TPC2,
1807 	[GAUDI2_DCORE3_ENGINE_ID_TPC_3] = TPC_ID_DCORE3_TPC3,
1808 	[GAUDI2_DCORE3_ENGINE_ID_TPC_4] = TPC_ID_DCORE3_TPC4,
1809 	[GAUDI2_DCORE3_ENGINE_ID_TPC_5] = TPC_ID_DCORE3_TPC5,
1810 	/* the PCI TPC is placed last (mapped liked HW) */
1811 	[GAUDI2_DCORE0_ENGINE_ID_TPC_6] = TPC_ID_DCORE0_TPC6,
1812 };
1813 
1814 static const u32 gaudi2_mme_engine_id_to_mme_id[] = {
1815 	[GAUDI2_DCORE0_ENGINE_ID_MME] = MME_ID_DCORE0,
1816 	[GAUDI2_DCORE1_ENGINE_ID_MME] = MME_ID_DCORE1,
1817 	[GAUDI2_DCORE2_ENGINE_ID_MME] = MME_ID_DCORE2,
1818 	[GAUDI2_DCORE3_ENGINE_ID_MME] = MME_ID_DCORE3,
1819 };
1820 
1821 static const u32 gaudi2_edma_engine_id_to_edma_id[] = {
1822 	[GAUDI2_ENGINE_ID_PDMA_0] = DMA_CORE_ID_PDMA0,
1823 	[GAUDI2_ENGINE_ID_PDMA_1] = DMA_CORE_ID_PDMA1,
1824 	[GAUDI2_DCORE0_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA0,
1825 	[GAUDI2_DCORE0_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA1,
1826 	[GAUDI2_DCORE1_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA2,
1827 	[GAUDI2_DCORE1_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA3,
1828 	[GAUDI2_DCORE2_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA4,
1829 	[GAUDI2_DCORE2_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA5,
1830 	[GAUDI2_DCORE3_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA6,
1831 	[GAUDI2_DCORE3_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA7,
1832 	[GAUDI2_ENGINE_ID_KDMA] = DMA_CORE_ID_KDMA,
1833 };
1834 
1835 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1836 	GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
1837 	GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
1838 	GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
1839 	GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0,
1840 	GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
1841 	GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0,
1842 	GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0,
1843 	GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0,
1844 };
1845 
1846 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = {
1847 	"gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal",
1848 	"gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal",
1849 	"gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal",
1850 	"gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal",
1851 	"gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal",
1852 	"gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal",
1853 	"gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal",
1854 	"gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal",
1855 	"gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal",
1856 	"gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
1857 };
1858 
1859 enum rtr_id {
1860 	DCORE0_RTR0,
1861 	DCORE0_RTR1,
1862 	DCORE0_RTR2,
1863 	DCORE0_RTR3,
1864 	DCORE0_RTR4,
1865 	DCORE0_RTR5,
1866 	DCORE0_RTR6,
1867 	DCORE0_RTR7,
1868 	DCORE1_RTR0,
1869 	DCORE1_RTR1,
1870 	DCORE1_RTR2,
1871 	DCORE1_RTR3,
1872 	DCORE1_RTR4,
1873 	DCORE1_RTR5,
1874 	DCORE1_RTR6,
1875 	DCORE1_RTR7,
1876 	DCORE2_RTR0,
1877 	DCORE2_RTR1,
1878 	DCORE2_RTR2,
1879 	DCORE2_RTR3,
1880 	DCORE2_RTR4,
1881 	DCORE2_RTR5,
1882 	DCORE2_RTR6,
1883 	DCORE2_RTR7,
1884 	DCORE3_RTR0,
1885 	DCORE3_RTR1,
1886 	DCORE3_RTR2,
1887 	DCORE3_RTR3,
1888 	DCORE3_RTR4,
1889 	DCORE3_RTR5,
1890 	DCORE3_RTR6,
1891 	DCORE3_RTR7,
1892 };
1893 
1894 static const u32 gaudi2_tpc_initiator_hbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1895 	DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
1896 	DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
1897 	DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
1898 	DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
1899 	DCORE0_RTR0
1900 };
1901 
1902 static const u32 gaudi2_tpc_initiator_lbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1903 	DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2,
1904 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5,
1905 	DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, DCORE2_RTR0, DCORE2_RTR0,
1906 	DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, DCORE3_RTR7, DCORE3_RTR7,
1907 	DCORE0_RTR0
1908 };
1909 
1910 static const u32 gaudi2_dec_initiator_hbw_rtr_id[NUMBER_OF_DEC] = {
1911 	DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
1912 	DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
1913 };
1914 
1915 static const u32 gaudi2_dec_initiator_lbw_rtr_id[NUMBER_OF_DEC] = {
1916 	DCORE0_RTR1, DCORE0_RTR1, DCORE1_RTR6, DCORE1_RTR6, DCORE2_RTR1, DCORE2_RTR1,
1917 	DCORE3_RTR6, DCORE3_RTR6, DCORE0_RTR0, DCORE0_RTR0
1918 };
1919 
1920 static const u32 gaudi2_nic_initiator_hbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
1921 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1922 	DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1923 };
1924 
1925 static const u32 gaudi2_nic_initiator_lbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
1926 	DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
1927 	DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
1928 };
1929 
1930 static const u32 gaudi2_edma_initiator_hbw_sft[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1931 	mmSFT0_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1932 	mmSFT0_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1933 	mmSFT1_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1934 	mmSFT1_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1935 	mmSFT2_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1936 	mmSFT2_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
1937 	mmSFT3_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
1938 	mmSFT3_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE
1939 };
1940 
1941 static const u32 gaudi2_pdma_initiator_hbw_rtr_id[NUM_OF_PDMA] = {
1942 	DCORE0_RTR0, DCORE0_RTR0
1943 };
1944 
1945 static const u32 gaudi2_pdma_initiator_lbw_rtr_id[NUM_OF_PDMA] = {
1946 	DCORE0_RTR2, DCORE0_RTR2
1947 };
1948 
1949 static const u32 gaudi2_rot_initiator_hbw_rtr_id[NUM_OF_ROT] = {
1950 	DCORE2_RTR0, DCORE3_RTR7
1951 };
1952 
1953 static const u32 gaudi2_rot_initiator_lbw_rtr_id[NUM_OF_ROT] = {
1954 	DCORE2_RTR2, DCORE3_RTR5
1955 };
1956 
1957 struct mme_initiators_rtr_id {
1958 	u32 wap0;
1959 	u32 wap1;
1960 	u32 write;
1961 	u32 read;
1962 	u32 sbte0;
1963 	u32 sbte1;
1964 	u32 sbte2;
1965 	u32 sbte3;
1966 	u32 sbte4;
1967 };
1968 
1969 enum mme_initiators {
1970 	MME_WAP0 = 0,
1971 	MME_WAP1,
1972 	MME_WRITE,
1973 	MME_READ,
1974 	MME_SBTE0,
1975 	MME_SBTE1,
1976 	MME_SBTE2,
1977 	MME_SBTE3,
1978 	MME_SBTE4,
1979 	MME_INITIATORS_MAX
1980 };
1981 
1982 static const struct mme_initiators_rtr_id
1983 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = {
1984 	{ .wap0 = 5, .wap1 = 7, .write = 6, .read = 7,
1985 	.sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6},
1986 	{ .wap0 = 10, .wap1 = 8, .write = 9, .read = 8,
1987 	.sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8},
1988 	{ .wap0 = 21, .wap1 = 23, .write = 22, .read = 23,
1989 	.sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23},
1990 	{ .wap0 = 30, .wap1 = 28, .write = 29, .read = 30,
1991 	.sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28},
1992 };
1993 
1994 enum razwi_event_sources {
1995 	RAZWI_TPC,
1996 	RAZWI_MME,
1997 	RAZWI_EDMA,
1998 	RAZWI_PDMA,
1999 	RAZWI_NIC,
2000 	RAZWI_DEC,
2001 	RAZWI_ROT
2002 };
2003 
2004 struct hbm_mc_error_causes {
2005 	u32 mask;
2006 	char cause[50];
2007 };
2008 
2009 static struct hl_special_block_info gaudi2_special_blocks[] = GAUDI2_SPECIAL_BLOCKS;
2010 
2011 /* Special blocks iterator is currently used to configure security protection bits,
2012  * and read global errors. Most HW blocks are addressable and those who aren't (N/A)-
2013  * must be skipped. Following configurations are commonly used for both PB config
2014  * and global error reading, since currently they both share the same settings.
2015  * Once it changes, we must remember to use separate configurations for either one.
2016  */
2017 static int gaudi2_iterator_skip_block_types[] = {
2018 		GAUDI2_BLOCK_TYPE_PLL,
2019 		GAUDI2_BLOCK_TYPE_EU_BIST,
2020 		GAUDI2_BLOCK_TYPE_HBM,
2021 		GAUDI2_BLOCK_TYPE_XFT
2022 };
2023 
2024 static struct range gaudi2_iterator_skip_block_ranges[] = {
2025 		/* Skip all PSOC blocks except for PSOC_GLOBAL_CONF */
2026 		{mmPSOC_I2C_M0_BASE, mmPSOC_EFUSE_BASE},
2027 		{mmPSOC_BTL_BASE, mmPSOC_MSTR_IF_RR_SHRD_HBW_BASE},
2028 		/* Skip all CPU blocks except for CPU_IF */
2029 		{mmCPU_CA53_CFG_BASE, mmCPU_CA53_CFG_BASE},
2030 		{mmCPU_TIMESTAMP_BASE, mmCPU_MSTR_IF_RR_SHRD_HBW_BASE}
2031 };
2032 
2033 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = {
2034 	{HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"},
2035 	{HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"},
2036 	{HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"},
2037 	{HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"},
2038 	{HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"},
2039 };
2040 
2041 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = {
2042 	[HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even",
2043 	[HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd",
2044 	[HBM_SEI_READ_ERR] = "SEI read data error",
2045 	[HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error",
2046 	[HBM_SEI_CATTRIP] = "SEI CATTRIP asserted",
2047 	[HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail",
2048 	[HBM_SEI_DFI] = "SEI DFI error",
2049 	[HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read",
2050 	[HBM_SEI_BIST_FAIL] = "SEI BIST fail"
2051 };
2052 
2053 struct mmu_spi_sei_cause {
2054 	char cause[50];
2055 	int clear_bit;
2056 };
2057 
2058 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = {
2059 	{"page fault", 1},		/* INTERRUPT_CLR[1] */
2060 	{"page access", 1},		/* INTERRUPT_CLR[1] */
2061 	{"bypass ddr", 2},		/* INTERRUPT_CLR[2] */
2062 	{"multi hit", 2},		/* INTERRUPT_CLR[2] */
2063 	{"mmu rei0", -1},		/* no clear register bit */
2064 	{"mmu rei1", -1},		/* no clear register bit */
2065 	{"stlb rei0", -1},		/* no clear register bit */
2066 	{"stlb rei1", -1},		/* no clear register bit */
2067 	{"rr privileged write hit", 2},	/* INTERRUPT_CLR[2] */
2068 	{"rr privileged read hit", 2},	/* INTERRUPT_CLR[2] */
2069 	{"rr secure write hit", 2},	/* INTERRUPT_CLR[2] */
2070 	{"rr secure read hit", 2},	/* INTERRUPT_CLR[2] */
2071 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2072 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2073 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2074 	{"bist_fail no use", 2},	/* INTERRUPT_CLR[2] */
2075 	{"slave error", 16},		/* INTERRUPT_CLR[16] */
2076 	{"dec error", 17},		/* INTERRUPT_CLR[17] */
2077 	{"burst fifo full", 2}		/* INTERRUPT_CLR[2] */
2078 };
2079 
2080 struct gaudi2_cache_invld_params {
2081 	u64 start_va;
2082 	u64 end_va;
2083 	u32 inv_start_val;
2084 	u32 flags;
2085 	bool range_invalidation;
2086 };
2087 
2088 struct gaudi2_tpc_idle_data {
2089 	struct engines_data *e;
2090 	unsigned long *mask;
2091 	bool *is_idle;
2092 	const char *tpc_fmt;
2093 };
2094 
2095 struct gaudi2_tpc_mmu_data {
2096 	u32 rw_asid;
2097 };
2098 
2099 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0};
2100 
2101 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val);
2102 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id);
2103 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id);
2104 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2105 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2106 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
2107 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
2108 										bool is_memset);
2109 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2110 		struct engines_data *e);
2111 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2112 		struct engines_data *e);
2113 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2114 		struct engines_data *e);
2115 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
2116 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr);
2117 
2118 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
2119 {
2120 
2121 }
2122 
2123 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev)
2124 {
2125 	return sizeof(struct packet_msg_short);
2126 }
2127 
2128 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev)
2129 {
2130 	return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence);
2131 }
2132 
2133 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
2134 {
2135 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2136 	int dcore, inst, tpc_seq;
2137 	u32 offset;
2138 
2139 	/* init the return code */
2140 	ctx->rc = 0;
2141 
2142 	for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
2143 		for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
2144 			tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
2145 
2146 			if (!(prop->tpc_enabled_mask & BIT(tpc_seq)))
2147 				continue;
2148 
2149 			offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
2150 
2151 			ctx->fn(hdev, dcore, inst, offset, ctx);
2152 			if (ctx->rc) {
2153 				dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
2154 							dcore, inst);
2155 				return;
2156 			}
2157 		}
2158 	}
2159 
2160 	if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6)))
2161 		return;
2162 
2163 	/* special check for PCI TPC (DCORE0_TPC6) */
2164 	offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
2165 	ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
2166 	if (ctx->rc)
2167 		dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
2168 }
2169 
2170 static bool gaudi2_host_phys_addr_valid(u64 addr)
2171 {
2172 	if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1))
2173 		return true;
2174 
2175 	return false;
2176 }
2177 
2178 static int set_number_of_functional_hbms(struct hl_device *hdev)
2179 {
2180 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2181 	u8 faulty_hbms = hweight64(hdev->dram_binning);
2182 
2183 	/* check if all HBMs should be used */
2184 	if (!faulty_hbms) {
2185 		dev_dbg(hdev->dev, "All HBM are in use (no binning)\n");
2186 		prop->num_functional_hbms = GAUDI2_HBM_NUM;
2187 		return 0;
2188 	}
2189 
2190 	/*
2191 	 * check for error condition in which number of binning
2192 	 * candidates is higher than the maximum supported by the
2193 	 * driver (in which case binning mask shall be ignored and driver will
2194 	 * set the default)
2195 	 */
2196 	if (faulty_hbms > MAX_FAULTY_HBMS) {
2197 		dev_err(hdev->dev,
2198 			"HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n",
2199 			MAX_FAULTY_HBMS, hdev->dram_binning);
2200 		return -EINVAL;
2201 	}
2202 
2203 	/*
2204 	 * by default, number of functional HBMs in Gaudi2 is always
2205 	 * GAUDI2_HBM_NUM - 1.
2206 	 */
2207 	prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms;
2208 	return 0;
2209 }
2210 
2211 static int gaudi2_set_dram_properties(struct hl_device *hdev)
2212 {
2213 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2214 	u32 basic_hbm_page_size;
2215 	int rc;
2216 
2217 	rc = set_number_of_functional_hbms(hdev);
2218 	if (rc)
2219 		return -EINVAL;
2220 
2221 	/*
2222 	 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround
2223 	 * in which we are using x16 bigger page size to be able to populate the entire
2224 	 * HBM mappings in the TLB
2225 	 */
2226 	basic_hbm_page_size = prop->num_functional_hbms * SZ_8M;
2227 	prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size;
2228 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
2229 	prop->dram_size = prop->num_functional_hbms * SZ_16G;
2230 	prop->dram_base_address = DRAM_PHYS_BASE;
2231 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
2232 	prop->dram_supports_virtual_memory = true;
2233 
2234 	prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size;
2235 	prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK;
2236 	prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START;
2237 	prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END;
2238 
2239 	/* since DRAM page size differs from DMMU page size we need to allocate
2240 	 * DRAM memory in units of dram_page size and mapping this memory in
2241 	 * units of DMMU page size. we overcome this size mismatch using a
2242 	 * scrambling routine which takes a DRAM page and converts it to a DMMU
2243 	 * page.
2244 	 * We therefore:
2245 	 * 1. partition the virtual address space to DRAM-page (whole) pages.
2246 	 *    (suppose we get n such pages)
2247 	 * 2. limit the amount of virtual address space we got from 1 above to
2248 	 *    a multiple of 64M as we don't want the scrambled address to cross
2249 	 *    the DRAM virtual address space.
2250 	 *    ( m = (n * DRAM_page_size) / DMMU_page_size).
2251 	 * 3. determine the and address accordingly
2252 	 *    end_addr = start_addr + m * 48M
2253 	 *
2254 	 *    the DRAM address MSBs (63:48) are not part of the roundup calculation
2255 	 */
2256 	prop->dmmu.start_addr = prop->dram_base_address +
2257 			(prop->dram_page_size *
2258 				DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
2259 
2260 	prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
2261 			div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
2262 
2263 	return 0;
2264 }
2265 
2266 static int gaudi2_set_fixed_properties(struct hl_device *hdev)
2267 {
2268 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2269 	struct hw_queue_properties *q_props;
2270 	u32 num_sync_stream_queues = 0;
2271 	int i;
2272 
2273 	prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
2274 	prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
2275 					GFP_KERNEL);
2276 
2277 	if (!prop->hw_queues_props)
2278 		return -ENOMEM;
2279 
2280 	q_props = prop->hw_queues_props;
2281 
2282 	for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
2283 		q_props[i].type = QUEUE_TYPE_HW;
2284 		q_props[i].driver_only = 0;
2285 
2286 		if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) {
2287 			q_props[i].supports_sync_stream = 0;
2288 		} else {
2289 			q_props[i].supports_sync_stream = 1;
2290 			num_sync_stream_queues++;
2291 		}
2292 
2293 		q_props[i].cb_alloc_flags = CB_ALLOC_USER;
2294 	}
2295 
2296 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
2297 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1;
2298 	q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL;
2299 
2300 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
2301 	prop->cfg_base_address = CFG_BASE;
2302 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0;
2303 	prop->host_base_address = HOST_PHYS_BASE_0;
2304 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0;
2305 	prop->max_pending_cs = GAUDI2_MAX_PENDING_CS;
2306 	prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER;
2307 	prop->user_dec_intr_count = NUMBER_OF_DEC;
2308 	prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
2309 	prop->completion_mode = HL_COMPLETION_MODE_CS;
2310 	prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
2311 	prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
2312 
2313 	prop->sram_base_address = SRAM_BASE_ADDR;
2314 	prop->sram_size = SRAM_SIZE;
2315 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
2316 	prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET;
2317 
2318 	prop->hints_range_reservation = true;
2319 
2320 	prop->rotator_enabled_mask = BIT(NUM_OF_ROT) - 1;
2321 
2322 	if (hdev->pldm)
2323 		prop->mmu_pgt_size = 0x800000; /* 8MB */
2324 	else
2325 		prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE;
2326 
2327 	prop->mmu_pte_size = HL_PTE_SIZE;
2328 	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
2329 	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
2330 
2331 	prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
2332 	prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
2333 	prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
2334 	prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
2335 	prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT;
2336 	prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
2337 	prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
2338 	prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
2339 	prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
2340 	prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK;
2341 	prop->dmmu.page_size = PAGE_SIZE_1GB;
2342 	prop->dmmu.num_hops = MMU_ARCH_6_HOPS;
2343 	prop->dmmu.last_mask = LAST_MASK;
2344 	prop->dmmu.host_resident = 1;
2345 	prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
2346 	prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
2347 
2348 	/*
2349 	 * this is done in order to be able to validate FW descriptor (i.e. validating that
2350 	 * the addresses and allocated space for FW image does not cross memory bounds).
2351 	 * for this reason we set the DRAM size to the minimum possible and later it will
2352 	 * be modified according to what reported in the cpucp info packet
2353 	 */
2354 	prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G;
2355 
2356 	hdev->pmmu_huge_range = true;
2357 	prop->pmmu.host_resident = 1;
2358 	prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
2359 	prop->pmmu.last_mask = LAST_MASK;
2360 	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
2361 	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
2362 
2363 	prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
2364 	prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
2365 	prop->hints_host_hpage_reserved_va_range.start_addr =
2366 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
2367 	prop->hints_host_hpage_reserved_va_range.end_addr =
2368 			RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END;
2369 
2370 	if (PAGE_SIZE == SZ_64K) {
2371 		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K;
2372 		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K;
2373 		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K;
2374 		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K;
2375 		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K;
2376 		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K;
2377 		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K;
2378 		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K;
2379 		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K;
2380 		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K;
2381 		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K;
2382 		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K;
2383 		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2384 		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2385 		prop->pmmu.page_size = PAGE_SIZE_64KB;
2386 
2387 		/* shifts and masks are the same in PMMU and HPMMU */
2388 		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2389 		prop->pmmu_huge.page_size = PAGE_SIZE_16MB;
2390 		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2391 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2392 	} else {
2393 		prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K;
2394 		prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K;
2395 		prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K;
2396 		prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K;
2397 		prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K;
2398 		prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K;
2399 		prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K;
2400 		prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K;
2401 		prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K;
2402 		prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K;
2403 		prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K;
2404 		prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K;
2405 		prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2406 		prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2407 		prop->pmmu.page_size = PAGE_SIZE_4KB;
2408 
2409 		/* shifts and masks are the same in PMMU and HPMMU */
2410 		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2411 		prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
2412 		prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2413 		prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2414 	}
2415 
2416 	prop->max_num_of_engines = GAUDI2_ENGINE_ID_SIZE;
2417 	prop->num_engine_cores = CPU_ID_MAX;
2418 	prop->cfg_size = CFG_SIZE;
2419 	prop->max_asid = MAX_ASID;
2420 	prop->num_of_events = GAUDI2_EVENT_SIZE;
2421 
2422 	prop->supports_engine_modes = true;
2423 
2424 	prop->dc_power_default = DC_POWER_DEFAULT;
2425 
2426 	prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
2427 	prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE;
2428 	prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
2429 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
2430 
2431 	strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2432 
2433 	prop->mme_master_slave_mode = 1;
2434 
2435 	prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
2436 					(num_sync_stream_queues * HL_RSVD_SOBS);
2437 
2438 	prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
2439 					(num_sync_stream_queues * HL_RSVD_MONS);
2440 
2441 	prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
2442 	prop->tpc_interrupt_id = GAUDI2_IRQ_NUM_TPC_ASSERT;
2443 	prop->eq_interrupt_id = GAUDI2_IRQ_NUM_EVENT_QUEUE;
2444 
2445 	prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
2446 
2447 	prop->fw_cpu_boot_dev_sts0_valid = false;
2448 	prop->fw_cpu_boot_dev_sts1_valid = false;
2449 	prop->hard_reset_done_by_fw = false;
2450 	prop->gic_interrupts_enable = true;
2451 
2452 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
2453 
2454 	prop->max_dec = NUMBER_OF_DEC;
2455 
2456 	prop->clk_pll_index = HL_GAUDI2_MME_PLL;
2457 
2458 	prop->dma_mask = 64;
2459 
2460 	prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0;
2461 
2462 	return 0;
2463 }
2464 
2465 static int gaudi2_pci_bars_map(struct hl_device *hdev)
2466 {
2467 	static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"};
2468 	bool is_wc[3] = {false, false, true};
2469 	int rc;
2470 
2471 	rc = hl_pci_bars_map(hdev, name, is_wc);
2472 	if (rc)
2473 		return rc;
2474 
2475 	hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR);
2476 
2477 	return 0;
2478 }
2479 
2480 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
2481 {
2482 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2483 	struct hl_inbound_pci_region pci_region;
2484 	u64 old_addr = addr;
2485 	int rc;
2486 
2487 	if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr))
2488 		return old_addr;
2489 
2490 	if (hdev->asic_prop.iatu_done_by_fw)
2491 		return U64_MAX;
2492 
2493 	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2494 	pci_region.mode = PCI_BAR_MATCH_MODE;
2495 	pci_region.bar = DRAM_BAR_ID;
2496 	pci_region.addr = addr;
2497 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
2498 	if (rc)
2499 		return U64_MAX;
2500 
2501 	if (gaudi2) {
2502 		old_addr = gaudi2->dram_bar_cur_addr;
2503 		gaudi2->dram_bar_cur_addr = addr;
2504 	}
2505 
2506 	return old_addr;
2507 }
2508 
2509 static int gaudi2_init_iatu(struct hl_device *hdev)
2510 {
2511 	struct hl_inbound_pci_region inbound_region;
2512 	struct hl_outbound_pci_region outbound_region;
2513 	u32 bar_addr_low, bar_addr_high;
2514 	int rc;
2515 
2516 	if (hdev->asic_prop.iatu_done_by_fw)
2517 		return 0;
2518 
2519 	/* Temporary inbound Region 0 - Bar 0 - Point to CFG
2520 	 * We must map this region in BAR match mode in order to
2521 	 * fetch BAR physical base address
2522 	 */
2523 	inbound_region.mode = PCI_BAR_MATCH_MODE;
2524 	inbound_region.bar = SRAM_CFG_BAR_ID;
2525 	/* Base address must be aligned to Bar size which is 256 MB */
2526 	inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF;
2527 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2528 	if (rc)
2529 		return rc;
2530 
2531 	/* Fetch physical BAR address */
2532 	bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF);
2533 	bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF;
2534 
2535 	hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low;
2536 
2537 	/* Inbound Region 0 - Bar 0 - Point to CFG */
2538 	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2539 	inbound_region.bar = SRAM_CFG_BAR_ID;
2540 	inbound_region.offset_in_bar = 0;
2541 	inbound_region.addr = STM_FLASH_BASE_ADDR;
2542 	inbound_region.size = CFG_REGION_SIZE;
2543 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2544 	if (rc)
2545 		return rc;
2546 
2547 	/* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */
2548 	inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2549 	inbound_region.bar = SRAM_CFG_BAR_ID;
2550 	inbound_region.offset_in_bar = CFG_REGION_SIZE;
2551 	inbound_region.addr = BAR0_RSRVD_BASE_ADDR;
2552 	inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE;
2553 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
2554 	if (rc)
2555 		return rc;
2556 
2557 	/* Inbound Region 2 - Bar 4 - Point to DRAM */
2558 	inbound_region.mode = PCI_BAR_MATCH_MODE;
2559 	inbound_region.bar = DRAM_BAR_ID;
2560 	inbound_region.addr = DRAM_PHYS_BASE;
2561 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
2562 	if (rc)
2563 		return rc;
2564 
2565 	/* Outbound Region 0 - Point to Host */
2566 	outbound_region.addr = HOST_PHYS_BASE_0;
2567 	outbound_region.size = HOST_PHYS_SIZE_0;
2568 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
2569 
2570 	return rc;
2571 }
2572 
2573 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev)
2574 {
2575 	return RREG32(mmHW_STATE);
2576 }
2577 
2578 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev)
2579 {
2580 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2581 
2582 	/*
2583 	 * check for error condition in which number of binning candidates
2584 	 * is higher than the maximum supported by the driver
2585 	 */
2586 	if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) {
2587 		dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n",
2588 					MAX_CLUSTER_BINNING_FAULTY_TPCS,
2589 					hdev->tpc_binning);
2590 		return -EINVAL;
2591 	}
2592 
2593 	prop->tpc_binning_mask = hdev->tpc_binning;
2594 	prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK;
2595 
2596 	return 0;
2597 }
2598 
2599 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev)
2600 {
2601 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2602 	struct hw_queue_properties *q_props = prop->hw_queues_props;
2603 	u64 tpc_binning_mask;
2604 	u8 subst_idx = 0;
2605 	int i, rc;
2606 
2607 	rc = gaudi2_tpc_binning_init_prop(hdev);
2608 	if (rc)
2609 		return rc;
2610 
2611 	tpc_binning_mask = prop->tpc_binning_mask;
2612 
2613 	for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) {
2614 		u8 subst_seq, binned, qid_base;
2615 
2616 		if (tpc_binning_mask == 0)
2617 			break;
2618 
2619 		if (subst_idx == 0) {
2620 			subst_seq = TPC_ID_DCORE0_TPC6;
2621 			qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
2622 		} else {
2623 			subst_seq = TPC_ID_DCORE3_TPC5;
2624 			qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0;
2625 		}
2626 
2627 
2628 		/* clear bit from mask */
2629 		binned = __ffs(tpc_binning_mask);
2630 		/*
2631 		 * Coverity complains about possible out-of-bound access in
2632 		 * clear_bit
2633 		 */
2634 		if (binned >= TPC_ID_SIZE) {
2635 			dev_err(hdev->dev,
2636 				"Invalid binned TPC (binning mask: %llx)\n",
2637 				tpc_binning_mask);
2638 			return -EINVAL;
2639 		}
2640 		clear_bit(binned, (unsigned long *)&tpc_binning_mask);
2641 
2642 		/* also clear replacing TPC bit from enabled mask */
2643 		clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask);
2644 
2645 		/* bin substite TPC's Qs */
2646 		q_props[qid_base].binned = 1;
2647 		q_props[qid_base + 1].binned = 1;
2648 		q_props[qid_base + 2].binned = 1;
2649 		q_props[qid_base + 3].binned = 1;
2650 
2651 		subst_idx++;
2652 	}
2653 
2654 	return 0;
2655 }
2656 
2657 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev)
2658 {
2659 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2660 	u8 num_faulty;
2661 
2662 	num_faulty = hweight32(hdev->decoder_binning);
2663 
2664 	/*
2665 	 * check for error condition in which number of binning candidates
2666 	 * is higher than the maximum supported by the driver
2667 	 */
2668 	if (num_faulty > MAX_FAULTY_DECODERS) {
2669 		dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n",
2670 						hdev->decoder_binning);
2671 		return -EINVAL;
2672 	}
2673 
2674 	prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK);
2675 
2676 	if (prop->decoder_binning_mask)
2677 		prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1));
2678 	else
2679 		prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK;
2680 
2681 	return 0;
2682 }
2683 
2684 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev)
2685 {
2686 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2687 
2688 	/* check if we should override default binning */
2689 	if (!hdev->dram_binning) {
2690 		prop->dram_binning_mask = 0;
2691 		prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK;
2692 		return;
2693 	}
2694 
2695 	/* set DRAM binning constraints */
2696 	prop->faulty_dram_cluster_map |= hdev->dram_binning;
2697 	prop->dram_binning_mask = hdev->dram_binning;
2698 	prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5);
2699 }
2700 
2701 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev)
2702 {
2703 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2704 	struct hw_queue_properties *q_props;
2705 	u8 seq, num_faulty;
2706 
2707 	num_faulty = hweight32(hdev->edma_binning);
2708 
2709 	/*
2710 	 * check for error condition in which number of binning candidates
2711 	 * is higher than the maximum supported by the driver
2712 	 */
2713 	if (num_faulty > MAX_FAULTY_EDMAS) {
2714 		dev_err(hdev->dev,
2715 			"EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n",
2716 			hdev->edma_binning);
2717 		return -EINVAL;
2718 	}
2719 
2720 	if (!hdev->edma_binning) {
2721 		prop->edma_binning_mask = 0;
2722 		prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK;
2723 		return 0;
2724 	}
2725 
2726 	seq = __ffs((unsigned long)hdev->edma_binning);
2727 
2728 	/* set binning constraints */
2729 	prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]);
2730 	prop->edma_binning_mask = hdev->edma_binning;
2731 	prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1);
2732 
2733 	/* bin substitute EDMA's queue */
2734 	q_props = prop->hw_queues_props;
2735 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1;
2736 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1;
2737 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1;
2738 	q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1;
2739 
2740 	return 0;
2741 }
2742 
2743 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask)
2744 {
2745 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2746 	u8 num_faulty, seq;
2747 
2748 	/* check if we should override default binning */
2749 	if (!xbar_edge_iso_mask) {
2750 		prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK;
2751 		return 0;
2752 	}
2753 
2754 	/*
2755 	 * note that it can be set to value other than 0 only after cpucp packet (i.e.
2756 	 * only the FW can set a redundancy value). for user it'll always be 0.
2757 	 */
2758 	num_faulty = hweight32(xbar_edge_iso_mask);
2759 
2760 	/*
2761 	 * check for error condition in which number of binning candidates
2762 	 * is higher than the maximum supported by the driver
2763 	 */
2764 	if (num_faulty > MAX_FAULTY_XBARS) {
2765 		dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n",
2766 									MAX_FAULTY_XBARS);
2767 		return -EINVAL;
2768 	}
2769 
2770 	seq = __ffs((unsigned long)xbar_edge_iso_mask);
2771 
2772 	/* set binning constraints */
2773 	prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]);
2774 	prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK;
2775 
2776 	return 0;
2777 }
2778 
2779 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask)
2780 {
2781 	int rc;
2782 
2783 	/*
2784 	 * mark all clusters as good, each component will "fail" cluster
2785 	 * based on eFuse/user values.
2786 	 * If more than single cluster is faulty- the chip is unusable
2787 	 */
2788 	hdev->asic_prop.faulty_dram_cluster_map = 0;
2789 
2790 	gaudi2_set_dram_binning_masks(hdev);
2791 
2792 	rc = gaudi2_set_edma_binning_masks(hdev);
2793 	if (rc)
2794 		return rc;
2795 
2796 	rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask);
2797 	if (rc)
2798 		return rc;
2799 
2800 
2801 	/* always initially set to full mask */
2802 	hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK;
2803 
2804 	return 0;
2805 }
2806 
2807 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev)
2808 {
2809 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2810 	int rc;
2811 
2812 	rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask);
2813 	if (rc)
2814 		return rc;
2815 
2816 	/* if we have DRAM binning reported by FW we should perform cluster config  */
2817 	if (prop->faulty_dram_cluster_map) {
2818 		u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map);
2819 
2820 		prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq];
2821 	}
2822 
2823 	return 0;
2824 }
2825 
2826 static int gaudi2_set_binning_masks(struct hl_device *hdev)
2827 {
2828 	int rc;
2829 
2830 	rc = gaudi2_set_cluster_binning_masks(hdev);
2831 	if (rc)
2832 		return rc;
2833 
2834 	rc = gaudi2_set_tpc_binning_masks(hdev);
2835 	if (rc)
2836 		return rc;
2837 
2838 	rc = gaudi2_set_dec_binning_masks(hdev);
2839 	if (rc)
2840 		return rc;
2841 
2842 	return 0;
2843 }
2844 
2845 static int gaudi2_cpucp_info_get(struct hl_device *hdev)
2846 {
2847 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2848 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2849 	long max_power;
2850 	u64 dram_size;
2851 	int rc;
2852 
2853 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2854 		return 0;
2855 
2856 	/* No point of asking this information again when not doing hard reset, as the device
2857 	 * CPU hasn't been reset
2858 	 */
2859 	if (hdev->reset_info.in_compute_reset)
2860 		return 0;
2861 
2862 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
2863 										mmCPU_BOOT_ERR1);
2864 	if (rc)
2865 		return rc;
2866 
2867 	dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
2868 	if (dram_size) {
2869 		/* we can have wither 5 or 6 HBMs. other values are invalid */
2870 
2871 		if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) &&
2872 					(dram_size != (GAUDI2_HBM_NUM * SZ_16G))) {
2873 			dev_err(hdev->dev,
2874 				"F/W reported invalid DRAM size %llu. Trying to use default size %llu\n",
2875 				dram_size, prop->dram_size);
2876 			dram_size = prop->dram_size;
2877 		}
2878 
2879 		prop->dram_size = dram_size;
2880 		prop->dram_end_address = prop->dram_base_address + dram_size;
2881 	}
2882 
2883 	if (!strlen(prop->cpucp_info.card_name))
2884 		strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2885 
2886 	/* Overwrite binning masks with the actual binning values from F/W */
2887 	hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
2888 	hdev->edma_binning = prop->cpucp_info.edma_binning_mask;
2889 	hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
2890 	hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
2891 
2892 	dev_dbg(hdev->dev, "Read binning masks: tpc: 0x%llx, dram: 0x%llx, edma: 0x%x, dec: 0x%x\n",
2893 			hdev->tpc_binning, hdev->dram_binning, hdev->edma_binning,
2894 			hdev->decoder_binning);
2895 
2896 	/*
2897 	 * at this point the DRAM parameters need to be updated according to data obtained
2898 	 * from the FW
2899 	 */
2900 	rc = hdev->asic_funcs->set_dram_properties(hdev);
2901 	if (rc)
2902 		return rc;
2903 
2904 	rc = hdev->asic_funcs->set_binning_masks(hdev);
2905 	if (rc)
2906 		return rc;
2907 
2908 	max_power = hl_fw_get_max_power(hdev);
2909 	if (max_power < 0)
2910 		return max_power;
2911 
2912 	prop->max_power_default = (u64) max_power;
2913 
2914 	return 0;
2915 }
2916 
2917 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
2918 {
2919 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
2920 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS];
2921 	int rc;
2922 
2923 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
2924 		return 0;
2925 
2926 	rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr);
2927 	if (rc)
2928 		return rc;
2929 
2930 	hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3];
2931 
2932 	return 0;
2933 }
2934 
2935 static int gaudi2_early_init(struct hl_device *hdev)
2936 {
2937 	struct asic_fixed_properties *prop = &hdev->asic_prop;
2938 	struct pci_dev *pdev = hdev->pdev;
2939 	resource_size_t pci_bar_size;
2940 	int rc;
2941 
2942 	rc = gaudi2_set_fixed_properties(hdev);
2943 	if (rc)
2944 		return rc;
2945 
2946 	/* Check BAR sizes */
2947 	pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
2948 
2949 	if (pci_bar_size != CFG_BAR_SIZE) {
2950 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2951 			SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
2952 		rc = -ENODEV;
2953 		goto free_queue_props;
2954 	}
2955 
2956 	pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
2957 	if (pci_bar_size != MSIX_BAR_SIZE) {
2958 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
2959 			MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
2960 		rc = -ENODEV;
2961 		goto free_queue_props;
2962 	}
2963 
2964 	prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
2965 	hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
2966 
2967 	/*
2968 	 * Only in pldm driver config iATU
2969 	 */
2970 	if (hdev->pldm)
2971 		hdev->asic_prop.iatu_done_by_fw = false;
2972 	else
2973 		hdev->asic_prop.iatu_done_by_fw = true;
2974 
2975 	rc = hl_pci_init(hdev);
2976 	if (rc)
2977 		goto free_queue_props;
2978 
2979 	/* Before continuing in the initialization, we need to read the preboot
2980 	 * version to determine whether we run with a security-enabled firmware
2981 	 */
2982 	rc = hl_fw_read_preboot_status(hdev);
2983 	if (rc) {
2984 		if (hdev->reset_on_preboot_fail)
2985 			/* we are already on failure flow, so don't check if hw_fini fails. */
2986 			hdev->asic_funcs->hw_fini(hdev, true, false);
2987 		goto pci_fini;
2988 	}
2989 
2990 	if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
2991 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
2992 		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
2993 		if (rc) {
2994 			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
2995 			goto pci_fini;
2996 		}
2997 	}
2998 
2999 	return 0;
3000 
3001 pci_fini:
3002 	hl_pci_fini(hdev);
3003 free_queue_props:
3004 	kfree(hdev->asic_prop.hw_queues_props);
3005 	return rc;
3006 }
3007 
3008 static int gaudi2_early_fini(struct hl_device *hdev)
3009 {
3010 	kfree(hdev->asic_prop.hw_queues_props);
3011 	hl_pci_fini(hdev);
3012 
3013 	return 0;
3014 }
3015 
3016 static bool gaudi2_is_arc_nic_owned(u64 arc_id)
3017 {
3018 	switch (arc_id) {
3019 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
3020 		return true;
3021 	default:
3022 		return false;
3023 	}
3024 }
3025 
3026 static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
3027 {
3028 	switch (arc_id) {
3029 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
3030 		return true;
3031 	default:
3032 		return false;
3033 	}
3034 }
3035 
3036 static void gaudi2_init_arcs(struct hl_device *hdev)
3037 {
3038 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3039 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3040 	u64 arc_id;
3041 	u32 i;
3042 
3043 	for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) {
3044 		if (gaudi2_is_arc_enabled(hdev, i))
3045 			continue;
3046 
3047 		gaudi2_set_arc_id_cap(hdev, i);
3048 	}
3049 
3050 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
3051 		if (!gaudi2_is_queue_enabled(hdev, i))
3052 			continue;
3053 
3054 		arc_id = gaudi2_queue_id_to_arc_id[i];
3055 		if (gaudi2_is_arc_enabled(hdev, arc_id))
3056 			continue;
3057 
3058 		if (gaudi2_is_arc_nic_owned(arc_id) &&
3059 				!(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)))
3060 			continue;
3061 
3062 		if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized &
3063 							BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)))
3064 			continue;
3065 
3066 		gaudi2_set_arc_id_cap(hdev, arc_id);
3067 	}
3068 
3069 	/* Fetch ARC scratchpad address */
3070 	hdev->asic_prop.engine_core_interrupt_reg_addr =
3071 		CFG_BASE + le32_to_cpu(dyn_regs->eng_arc_irq_ctrl);
3072 }
3073 
3074 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
3075 {
3076 	u32 reg_base, reg_val;
3077 	int rc;
3078 
3079 	switch (cpu_id) {
3080 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3:
3081 		/* Each ARC scheduler has 2 consecutive DCCM blocks */
3082 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3083 						ARC_DCCM_BLOCK_SIZE * 2, true);
3084 		if (rc)
3085 			return rc;
3086 		break;
3087 	case CPU_ID_SCHED_ARC4:
3088 	case CPU_ID_SCHED_ARC5:
3089 	case CPU_ID_MME_QMAN_ARC0:
3090 	case CPU_ID_MME_QMAN_ARC1:
3091 		reg_base = gaudi2_arc_blocks_bases[cpu_id];
3092 
3093 		/* Scrub lower DCCM block */
3094 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3095 						ARC_DCCM_BLOCK_SIZE, true);
3096 		if (rc)
3097 			return rc;
3098 
3099 		/* Switch to upper DCCM block */
3100 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1);
3101 		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3102 
3103 		/* Scrub upper DCCM block */
3104 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3105 						ARC_DCCM_BLOCK_SIZE, true);
3106 		if (rc)
3107 			return rc;
3108 
3109 		/* Switch to lower DCCM block */
3110 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0);
3111 		WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3112 		break;
3113 	default:
3114 		rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3115 						ARC_DCCM_BLOCK_SIZE, true);
3116 		if (rc)
3117 			return rc;
3118 	}
3119 
3120 	return 0;
3121 }
3122 
3123 static int gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
3124 {
3125 	u16 arc_id;
3126 	int rc;
3127 
3128 	for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
3129 		if (!gaudi2_is_arc_enabled(hdev, arc_id))
3130 			continue;
3131 
3132 		rc = gaudi2_scrub_arc_dccm(hdev, arc_id);
3133 		if (rc)
3134 			return rc;
3135 	}
3136 
3137 	return 0;
3138 }
3139 
3140 static int gaudi2_late_init(struct hl_device *hdev)
3141 {
3142 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3143 	int rc;
3144 
3145 	hdev->asic_prop.supports_advanced_cpucp_rc = true;
3146 
3147 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
3148 					gaudi2->virt_msix_db_dma_addr);
3149 	if (rc) {
3150 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
3151 		return rc;
3152 	}
3153 
3154 	rc = gaudi2_fetch_psoc_frequency(hdev);
3155 	if (rc) {
3156 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
3157 		goto disable_pci_access;
3158 	}
3159 
3160 	gaudi2_init_arcs(hdev);
3161 
3162 	rc = gaudi2_scrub_arcs_dccm(hdev);
3163 	if (rc) {
3164 		dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
3165 		goto disable_pci_access;
3166 	}
3167 
3168 	gaudi2_init_security(hdev);
3169 
3170 	return 0;
3171 
3172 disable_pci_access:
3173 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
3174 
3175 	return rc;
3176 }
3177 
3178 static void gaudi2_late_fini(struct hl_device *hdev)
3179 {
3180 	hl_hwmon_release_resources(hdev);
3181 }
3182 
3183 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
3184 {
3185 	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3186 
3187 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3188 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3189 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3190 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3191 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3192 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3193 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3194 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3195 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3196 	HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3197 }
3198 
3199 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev)
3200 {
3201 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3202 	struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3203 	u32 block_size, umr_start_idx, num_umr_blocks;
3204 	int i;
3205 
3206 	for (i = 0 ; i < NUM_ARC_CPUS ; i++) {
3207 		if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3)
3208 			block_size = ARC_DCCM_BLOCK_SIZE * 2;
3209 		else
3210 			block_size = ARC_DCCM_BLOCK_SIZE;
3211 
3212 		blocks[i].address = gaudi2_arc_dccm_bases[i];
3213 		blocks[i].size = block_size;
3214 	}
3215 
3216 	blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE;
3217 	blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE;
3218 
3219 	blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE;
3220 	blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE;
3221 
3222 	blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE;
3223 	blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE;
3224 
3225 	blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE;
3226 	blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE;
3227 
3228 	blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE;
3229 	blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE;
3230 
3231 	blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE;
3232 	blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE;
3233 
3234 	blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE;
3235 	blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE;
3236 
3237 	blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE;
3238 	blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE;
3239 
3240 	umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS;
3241 	num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS;
3242 	for (i = 0 ; i < num_umr_blocks ; i++) {
3243 		u8 nic_id, umr_block_id;
3244 
3245 		nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS;
3246 		umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS;
3247 
3248 		blocks[umr_start_idx + i].address =
3249 			mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE +
3250 			(nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET +
3251 			(nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET +
3252 			umr_block_id * NIC_UMR_OFFSET;
3253 		blocks[umr_start_idx + i].size = HL_BLOCK_SIZE;
3254 	}
3255 
3256 	/* Expose decoder HW configuration block to user */
3257 	gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX);
3258 
3259 	for (i = 1; i < NUM_OF_DCORES; ++i) {
3260 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE;
3261 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE;
3262 
3263 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address =
3264 						mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET;
3265 
3266 		blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address =
3267 						mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET;
3268 	}
3269 }
3270 
3271 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
3272 {
3273 	dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
3274 	void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {};
3275 	int i, j, rc = 0;
3276 
3277 	/* The device ARC works with 32-bits addresses, and because there is a single HW register
3278 	 * that holds the extension bits (49..28), these bits must be identical in all the allocated
3279 	 * range.
3280 	 */
3281 
3282 	for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
3283 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
3284 							&dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO);
3285 		if (!virt_addr_arr[i]) {
3286 			rc = -ENOMEM;
3287 			goto free_dma_mem_arr;
3288 		}
3289 
3290 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
3291 		if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr))
3292 			break;
3293 	}
3294 
3295 	if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) {
3296 		dev_err(hdev->dev,
3297 			"MSB of ARC accessible DMA memory are not identical in all range\n");
3298 		rc = -EFAULT;
3299 		goto free_dma_mem_arr;
3300 	}
3301 
3302 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
3303 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
3304 
3305 free_dma_mem_arr:
3306 	for (j = 0 ; j < i ; j++)
3307 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
3308 						dma_addr_arr[j]);
3309 
3310 	return rc;
3311 }
3312 
3313 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev)
3314 {
3315 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3316 	struct pci_mem_region *region;
3317 
3318 	/* CFG */
3319 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
3320 	region->region_base = CFG_BASE;
3321 	region->region_size = CFG_SIZE;
3322 	region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR;
3323 	region->bar_size = CFG_BAR_SIZE;
3324 	region->bar_id = SRAM_CFG_BAR_ID;
3325 	region->used = 1;
3326 
3327 	/* SRAM */
3328 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
3329 	region->region_base = SRAM_BASE_ADDR;
3330 	region->region_size = SRAM_SIZE;
3331 	region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE;
3332 	region->bar_size = CFG_BAR_SIZE;
3333 	region->bar_id = SRAM_CFG_BAR_ID;
3334 	region->used = 1;
3335 
3336 	/* DRAM */
3337 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
3338 	region->region_base = DRAM_PHYS_BASE;
3339 	region->region_size = hdev->asic_prop.dram_size;
3340 	region->offset_in_bar = 0;
3341 	region->bar_size = prop->dram_pci_bar_size;
3342 	region->bar_id = DRAM_BAR_ID;
3343 	region->used = 1;
3344 }
3345 
3346 static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
3347 {
3348 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3349 	int i, j, k;
3350 
3351 	/* Initialize TPC interrupt */
3352 	HL_USR_INTR_STRUCT_INIT(hdev->tpc_interrupt, hdev, 0, HL_USR_INTERRUPT_TPC);
3353 
3354 	/* Initialize unexpected error interrupt */
3355 	HL_USR_INTR_STRUCT_INIT(hdev->unexpected_error_interrupt, hdev, 0,
3356 						HL_USR_INTERRUPT_UNEXPECTED);
3357 
3358 	/* Initialize common user CQ interrupt */
3359 	HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
3360 				HL_COMMON_USER_CQ_INTERRUPT_ID, HL_USR_INTERRUPT_CQ);
3361 
3362 	/* Initialize common decoder interrupt */
3363 	HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev,
3364 				HL_COMMON_DEC_INTERRUPT_ID, HL_USR_INTERRUPT_DECODER);
3365 
3366 	/* User interrupts structure holds both decoder and user interrupts from various engines.
3367 	 * We first initialize the decoder interrupts and then we add the user interrupts.
3368 	 * The only limitation is that the last decoder interrupt id must be smaller
3369 	 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time.
3370 	 */
3371 
3372 	/* Initialize decoder interrupts, expose only normal interrupts,
3373 	 * error interrupts to be handled by driver
3374 	 */
3375 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM;
3376 										i += 2, j++)
3377 		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i,
3378 						HL_USR_INTERRUPT_DECODER);
3379 
3380 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++)
3381 		HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, HL_USR_INTERRUPT_CQ);
3382 }
3383 
3384 static inline int gaudi2_get_non_zero_random_int(void)
3385 {
3386 	int rand = get_random_u32();
3387 
3388 	return rand ? rand : 1;
3389 }
3390 
3391 static void gaudi2_special_blocks_free(struct hl_device *hdev)
3392 {
3393 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3394 	struct hl_skip_blocks_cfg *skip_special_blocks_cfg =
3395 			&prop->skip_special_blocks_cfg;
3396 
3397 	kfree(prop->special_blocks);
3398 	kfree(skip_special_blocks_cfg->block_types);
3399 	kfree(skip_special_blocks_cfg->block_ranges);
3400 }
3401 
3402 static void gaudi2_special_blocks_iterator_free(struct hl_device *hdev)
3403 {
3404 	gaudi2_special_blocks_free(hdev);
3405 }
3406 
3407 static bool gaudi2_special_block_skip(struct hl_device *hdev,
3408 		struct hl_special_blocks_cfg *special_blocks_cfg,
3409 		u32 blk_idx, u32 major, u32 minor, u32 sub_minor)
3410 {
3411 	return false;
3412 }
3413 
3414 static int gaudi2_special_blocks_config(struct hl_device *hdev)
3415 {
3416 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3417 	int i, rc;
3418 
3419 	/* Configure Special blocks */
3420 	prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE;
3421 	prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks);
3422 	prop->special_blocks = kmalloc_array(prop->num_of_special_blocks,
3423 			sizeof(*prop->special_blocks), GFP_KERNEL);
3424 	if (!prop->special_blocks)
3425 		return -ENOMEM;
3426 
3427 	for (i = 0 ; i < prop->num_of_special_blocks ; i++)
3428 		memcpy(&prop->special_blocks[i], &gaudi2_special_blocks[i],
3429 				sizeof(*prop->special_blocks));
3430 
3431 	/* Configure when to skip Special blocks */
3432 	memset(&prop->skip_special_blocks_cfg, 0, sizeof(prop->skip_special_blocks_cfg));
3433 	prop->skip_special_blocks_cfg.skip_block_hook = gaudi2_special_block_skip;
3434 
3435 	if (ARRAY_SIZE(gaudi2_iterator_skip_block_types)) {
3436 		prop->skip_special_blocks_cfg.block_types =
3437 				kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_types),
3438 					sizeof(gaudi2_iterator_skip_block_types[0]), GFP_KERNEL);
3439 		if (!prop->skip_special_blocks_cfg.block_types) {
3440 			rc = -ENOMEM;
3441 			goto free_special_blocks;
3442 		}
3443 
3444 		memcpy(prop->skip_special_blocks_cfg.block_types, gaudi2_iterator_skip_block_types,
3445 				sizeof(gaudi2_iterator_skip_block_types));
3446 
3447 		prop->skip_special_blocks_cfg.block_types_len =
3448 					ARRAY_SIZE(gaudi2_iterator_skip_block_types);
3449 	}
3450 
3451 	if (ARRAY_SIZE(gaudi2_iterator_skip_block_ranges)) {
3452 		prop->skip_special_blocks_cfg.block_ranges =
3453 				kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_ranges),
3454 					sizeof(gaudi2_iterator_skip_block_ranges[0]), GFP_KERNEL);
3455 		if (!prop->skip_special_blocks_cfg.block_ranges) {
3456 			rc = -ENOMEM;
3457 			goto free_skip_special_blocks_types;
3458 		}
3459 
3460 		for (i = 0 ; i < ARRAY_SIZE(gaudi2_iterator_skip_block_ranges) ; i++)
3461 			memcpy(&prop->skip_special_blocks_cfg.block_ranges[i],
3462 					&gaudi2_iterator_skip_block_ranges[i],
3463 					sizeof(struct range));
3464 
3465 		prop->skip_special_blocks_cfg.block_ranges_len =
3466 					ARRAY_SIZE(gaudi2_iterator_skip_block_ranges);
3467 	}
3468 
3469 	return 0;
3470 
3471 free_skip_special_blocks_types:
3472 	kfree(prop->skip_special_blocks_cfg.block_types);
3473 free_special_blocks:
3474 	kfree(prop->special_blocks);
3475 
3476 	return rc;
3477 }
3478 
3479 static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev)
3480 {
3481 	return gaudi2_special_blocks_config(hdev);
3482 }
3483 
3484 static void gaudi2_test_queues_msgs_free(struct hl_device *hdev)
3485 {
3486 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3487 	struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3488 	int i;
3489 
3490 	for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3491 		/* bail-out if this is an allocation failure point */
3492 		if (!msg_info[i].kern_addr)
3493 			break;
3494 
3495 		hl_asic_dma_pool_free(hdev, msg_info[i].kern_addr, msg_info[i].dma_addr);
3496 		msg_info[i].kern_addr = NULL;
3497 	}
3498 }
3499 
3500 static int gaudi2_test_queues_msgs_alloc(struct hl_device *hdev)
3501 {
3502 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3503 	struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3504 	int i, rc;
3505 
3506 	/* allocate a message-short buf for each Q we intend to test */
3507 	for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3508 		msg_info[i].kern_addr =
3509 			(void *)hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_short),
3510 							GFP_KERNEL, &msg_info[i].dma_addr);
3511 		if (!msg_info[i].kern_addr) {
3512 			dev_err(hdev->dev,
3513 				"Failed to allocate dma memory for H/W queue %d testing\n", i);
3514 			rc = -ENOMEM;
3515 			goto err_exit;
3516 		}
3517 	}
3518 
3519 	return 0;
3520 
3521 err_exit:
3522 	gaudi2_test_queues_msgs_free(hdev);
3523 	return rc;
3524 }
3525 
3526 static int gaudi2_sw_init(struct hl_device *hdev)
3527 {
3528 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3529 	struct gaudi2_device *gaudi2;
3530 	int i, rc;
3531 
3532 	/* Allocate device structure */
3533 	gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL);
3534 	if (!gaudi2)
3535 		return -ENOMEM;
3536 
3537 	for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) {
3538 		if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid)
3539 			continue;
3540 
3541 		if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) {
3542 			dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n",
3543 				GAUDI2_EVENT_SIZE);
3544 			rc = -EINVAL;
3545 			goto free_gaudi2_device;
3546 		}
3547 
3548 		gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id;
3549 	}
3550 
3551 	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++)
3552 		gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int();
3553 
3554 	gaudi2->cpucp_info_get = gaudi2_cpucp_info_get;
3555 
3556 	hdev->asic_specific = gaudi2;
3557 
3558 	/* Create DMA pool for small allocations.
3559 	 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped
3560 	 * PI/CI registers allocated from this pool have this restriction
3561 	 */
3562 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev,
3563 					GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0);
3564 	if (!hdev->dma_pool) {
3565 		dev_err(hdev->dev, "failed to create DMA pool\n");
3566 		rc = -ENOMEM;
3567 		goto free_gaudi2_device;
3568 	}
3569 
3570 	rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev);
3571 	if (rc)
3572 		goto free_dma_pool;
3573 
3574 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
3575 	if (!hdev->cpu_accessible_dma_pool) {
3576 		dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n");
3577 		rc = -ENOMEM;
3578 		goto free_cpu_dma_mem;
3579 	}
3580 
3581 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem,
3582 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
3583 	if (rc) {
3584 		dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n");
3585 		rc = -EFAULT;
3586 		goto free_cpu_accessible_dma_pool;
3587 	}
3588 
3589 	gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
3590 								&gaudi2->virt_msix_db_dma_addr);
3591 	if (!gaudi2->virt_msix_db_cpu_addr) {
3592 		dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
3593 		rc = -ENOMEM;
3594 		goto free_cpu_accessible_dma_pool;
3595 	}
3596 
3597 	spin_lock_init(&gaudi2->hw_queues_lock);
3598 
3599 	gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE,
3600 							&gaudi2->scratchpad_bus_address,
3601 							GFP_KERNEL | __GFP_ZERO);
3602 	if (!gaudi2->scratchpad_kernel_address) {
3603 		rc = -ENOMEM;
3604 		goto free_virt_msix_db_mem;
3605 	}
3606 
3607 	gaudi2_user_mapped_blocks_init(hdev);
3608 
3609 	/* Initialize user interrupts */
3610 	gaudi2_user_interrupt_setup(hdev);
3611 
3612 	hdev->supports_coresight = true;
3613 	hdev->supports_sync_stream = true;
3614 	hdev->supports_cb_mapping = true;
3615 	hdev->supports_wait_for_multi_cs = false;
3616 
3617 	prop->supports_compute_reset = true;
3618 
3619 	hdev->asic_funcs->set_pci_memory_regions(hdev);
3620 
3621 	rc = gaudi2_special_blocks_iterator_config(hdev);
3622 	if (rc)
3623 		goto free_scratchpad_mem;
3624 
3625 	rc = gaudi2_test_queues_msgs_alloc(hdev);
3626 	if (rc)
3627 		goto special_blocks_free;
3628 
3629 	return 0;
3630 
3631 special_blocks_free:
3632 	gaudi2_special_blocks_iterator_free(hdev);
3633 free_scratchpad_mem:
3634 	hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3635 				  gaudi2->scratchpad_bus_address);
3636 free_virt_msix_db_mem:
3637 	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3638 free_cpu_accessible_dma_pool:
3639 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3640 free_cpu_dma_mem:
3641 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3642 					hdev->cpu_accessible_dma_address);
3643 free_dma_pool:
3644 	dma_pool_destroy(hdev->dma_pool);
3645 free_gaudi2_device:
3646 	kfree(gaudi2);
3647 	return rc;
3648 }
3649 
3650 static int gaudi2_sw_fini(struct hl_device *hdev)
3651 {
3652 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3653 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3654 
3655 	gaudi2_test_queues_msgs_free(hdev);
3656 
3657 	gaudi2_special_blocks_iterator_free(hdev);
3658 
3659 	hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3660 
3661 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3662 
3663 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3664 						hdev->cpu_accessible_dma_address);
3665 
3666 	hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
3667 					gaudi2->scratchpad_bus_address);
3668 
3669 	dma_pool_destroy(hdev->dma_pool);
3670 
3671 	kfree(gaudi2);
3672 
3673 	return 0;
3674 }
3675 
3676 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base)
3677 {
3678 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP |
3679 						QM_GLBL_CFG1_CQF_STOP |
3680 						QM_GLBL_CFG1_CP_STOP);
3681 
3682 	/* stop also the ARC */
3683 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP);
3684 }
3685 
3686 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base)
3687 {
3688 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH |
3689 						QM_GLBL_CFG1_CQF_FLUSH |
3690 						QM_GLBL_CFG1_CP_FLUSH);
3691 }
3692 
3693 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base)
3694 {
3695 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH);
3696 }
3697 
3698 /**
3699  * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters
3700  *
3701  * @hdev: pointer to the habanalabs device structure
3702  * @queue_id: queue to clear fence counters to
3703  * @skip_fence: if true set maximum fence value to all fence counters to avoid
3704  *              getting stuck on any fence value. otherwise set all fence
3705  *              counters to 0 (standard clear of fence counters)
3706  */
3707 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id,
3708 						bool skip_fence)
3709 {
3710 	u32 size, reg_base;
3711 	u32 addr, val;
3712 
3713 	reg_base = gaudi2_qm_blocks_bases[queue_id];
3714 
3715 	addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET;
3716 	size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0;
3717 
3718 	/*
3719 	 * in case we want to make sure that QM that is stuck on a fence will
3720 	 * be released we should set the fence counter to a higher value that
3721 	 * the value the QM waiting for. to comply with any fence counter of
3722 	 * any value we set maximum fence value to all counters
3723 	 */
3724 	val = skip_fence ? U32_MAX : 0;
3725 	gaudi2_memset_device_lbw(hdev, addr, size, val);
3726 }
3727 
3728 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id)
3729 {
3730 	u32 reg_base = gaudi2_qm_blocks_bases[queue_id];
3731 
3732 	gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true);
3733 	gaudi2_flush_qman_common(hdev, reg_base);
3734 	gaudi2_flush_qman_arc_common(hdev, reg_base);
3735 }
3736 
3737 static void gaudi2_stop_dma_qmans(struct hl_device *hdev)
3738 {
3739 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3740 	int dcore, inst;
3741 
3742 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3743 		goto stop_edma_qmans;
3744 
3745 	/* Stop CPs of PDMA QMANs */
3746 	gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE);
3747 	gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE);
3748 
3749 stop_edma_qmans:
3750 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3751 		return;
3752 
3753 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3754 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3755 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3756 			u32 qm_base;
3757 
3758 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3759 				continue;
3760 
3761 			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3762 					inst * DCORE_EDMA_OFFSET;
3763 
3764 			/* Stop CPs of EDMA QMANs */
3765 			gaudi2_stop_qman_common(hdev, qm_base);
3766 		}
3767 	}
3768 }
3769 
3770 static void gaudi2_stop_mme_qmans(struct hl_device *hdev)
3771 {
3772 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3773 	u32 offset, i;
3774 
3775 	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3776 
3777 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
3778 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)))
3779 			continue;
3780 
3781 		gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3782 	}
3783 }
3784 
3785 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev)
3786 {
3787 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3788 	u32 reg_base;
3789 	int i;
3790 
3791 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3792 		return;
3793 
3794 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3795 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3796 			continue;
3797 
3798 		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3799 		gaudi2_stop_qman_common(hdev, reg_base);
3800 	}
3801 }
3802 
3803 static void gaudi2_stop_rot_qmans(struct hl_device *hdev)
3804 {
3805 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3806 	u32 reg_base;
3807 	int i;
3808 
3809 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3810 		return;
3811 
3812 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3813 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3814 			continue;
3815 
3816 		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3817 		gaudi2_stop_qman_common(hdev, reg_base);
3818 	}
3819 }
3820 
3821 static void gaudi2_stop_nic_qmans(struct hl_device *hdev)
3822 {
3823 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3824 	u32 reg_base, queue_id;
3825 	int i;
3826 
3827 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3828 		return;
3829 
3830 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3831 
3832 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3833 		if (!(hdev->nic_ports_mask & BIT(i)))
3834 			continue;
3835 
3836 		reg_base = gaudi2_qm_blocks_bases[queue_id];
3837 		gaudi2_stop_qman_common(hdev, reg_base);
3838 	}
3839 }
3840 
3841 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base)
3842 {
3843 	u32 reg_val;
3844 
3845 	reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1);
3846 	WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val);
3847 }
3848 
3849 static void gaudi2_dma_stall(struct hl_device *hdev)
3850 {
3851 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3852 	int dcore, inst;
3853 
3854 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3855 		goto stall_edma;
3856 
3857 	gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE);
3858 	gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE);
3859 
3860 stall_edma:
3861 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3862 		return;
3863 
3864 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3865 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3866 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3867 			u32 core_base;
3868 
3869 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3870 				continue;
3871 
3872 			core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET +
3873 					inst * DCORE_EDMA_OFFSET;
3874 
3875 			/* Stall CPs of EDMA QMANs */
3876 			gaudi2_stall_dma_common(hdev, core_base);
3877 		}
3878 	}
3879 }
3880 
3881 static void gaudi2_mme_stall(struct hl_device *hdev)
3882 {
3883 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3884 	u32 offset, i;
3885 
3886 	offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL;
3887 
3888 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
3889 		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3890 			WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1);
3891 }
3892 
3893 static void gaudi2_tpc_stall(struct hl_device *hdev)
3894 {
3895 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3896 	u32 reg_base;
3897 	int i;
3898 
3899 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3900 		return;
3901 
3902 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3903 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3904 			continue;
3905 
3906 		reg_base = gaudi2_tpc_cfg_blocks_bases[i];
3907 		WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1);
3908 	}
3909 }
3910 
3911 static void gaudi2_rotator_stall(struct hl_device *hdev)
3912 {
3913 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3914 	u32 reg_val;
3915 	int i;
3916 
3917 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3918 		return;
3919 
3920 	reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) |
3921 			FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) |
3922 			FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1);
3923 
3924 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3925 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3926 			continue;
3927 
3928 		WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val);
3929 	}
3930 }
3931 
3932 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base)
3933 {
3934 	WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0);
3935 }
3936 
3937 static void gaudi2_disable_dma_qmans(struct hl_device *hdev)
3938 {
3939 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3940 	int dcore, inst;
3941 
3942 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3943 		goto stop_edma_qmans;
3944 
3945 	gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE);
3946 	gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE);
3947 
3948 stop_edma_qmans:
3949 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3950 		return;
3951 
3952 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3953 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3954 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3955 			u32 qm_base;
3956 
3957 			if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3958 				continue;
3959 
3960 			qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3961 					inst * DCORE_EDMA_OFFSET;
3962 
3963 			/* Disable CPs of EDMA QMANs */
3964 			gaudi2_disable_qman_common(hdev, qm_base);
3965 		}
3966 	}
3967 }
3968 
3969 static void gaudi2_disable_mme_qmans(struct hl_device *hdev)
3970 {
3971 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3972 	u32 offset, i;
3973 
3974 	offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3975 
3976 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
3977 		if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
3978 			gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3979 }
3980 
3981 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev)
3982 {
3983 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
3984 	u32 reg_base;
3985 	int i;
3986 
3987 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3988 		return;
3989 
3990 	for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3991 		if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3992 			continue;
3993 
3994 		reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3995 		gaudi2_disable_qman_common(hdev, reg_base);
3996 	}
3997 }
3998 
3999 static void gaudi2_disable_rot_qmans(struct hl_device *hdev)
4000 {
4001 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4002 	u32 reg_base;
4003 	int i;
4004 
4005 	if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
4006 		return;
4007 
4008 	for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
4009 		if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
4010 			continue;
4011 
4012 		reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
4013 		gaudi2_disable_qman_common(hdev, reg_base);
4014 	}
4015 }
4016 
4017 static void gaudi2_disable_nic_qmans(struct hl_device *hdev)
4018 {
4019 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4020 	u32 reg_base, queue_id;
4021 	int i;
4022 
4023 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4024 		return;
4025 
4026 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4027 
4028 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4029 		if (!(hdev->nic_ports_mask & BIT(i)))
4030 			continue;
4031 
4032 		reg_base = gaudi2_qm_blocks_bases[queue_id];
4033 		gaudi2_disable_qman_common(hdev, reg_base);
4034 	}
4035 }
4036 
4037 static void gaudi2_enable_timestamp(struct hl_device *hdev)
4038 {
4039 	/* Disable the timestamp counter */
4040 	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4041 
4042 	/* Zero the lower/upper parts of the 64-bit counter */
4043 	WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0);
4044 	WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0);
4045 
4046 	/* Enable the counter */
4047 	WREG32(mmPSOC_TIMESTAMP_BASE, 1);
4048 }
4049 
4050 static void gaudi2_disable_timestamp(struct hl_device *hdev)
4051 {
4052 	/* Disable the timestamp counter */
4053 	WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4054 }
4055 
4056 static const char *gaudi2_irq_name(u16 irq_number)
4057 {
4058 	switch (irq_number) {
4059 	case GAUDI2_IRQ_NUM_EVENT_QUEUE:
4060 		return "gaudi2 cpu eq";
4061 	case GAUDI2_IRQ_NUM_COMPLETION:
4062 		return "gaudi2 completion";
4063 	case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
4064 		return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
4065 	case GAUDI2_IRQ_NUM_TPC_ASSERT:
4066 		return "gaudi2 tpc assert";
4067 	case GAUDI2_IRQ_NUM_UNEXPECTED_ERROR:
4068 		return "gaudi2 unexpected error";
4069 	case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
4070 		return "gaudi2 user completion";
4071 	default:
4072 		return "invalid";
4073 	}
4074 }
4075 
4076 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
4077 {
4078 	int i, irq, relative_idx;
4079 	struct hl_dec *dec;
4080 
4081 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) {
4082 		irq = pci_irq_vector(hdev->pdev, i);
4083 		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4084 
4085 		dec = hdev->dec + relative_idx / 2;
4086 
4087 		/* We pass different structures depending on the irq handler. For the abnormal
4088 		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4089 		 * user_interrupt entry
4090 		 */
4091 		free_irq(irq, ((relative_idx % 2) ?
4092 				(void *) dec :
4093 				(void *) &hdev->user_interrupt[dec->core_id]));
4094 	}
4095 }
4096 
4097 static int gaudi2_dec_enable_msix(struct hl_device *hdev)
4098 {
4099 	int rc, i, irq_init_cnt, irq, relative_idx;
4100 	struct hl_dec *dec;
4101 
4102 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
4103 			i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM;
4104 			i++, irq_init_cnt++) {
4105 
4106 		irq = pci_irq_vector(hdev->pdev, i);
4107 		relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4108 
4109 		/* We pass different structures depending on the irq handler. For the abnormal
4110 		 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4111 		 * user_interrupt entry
4112 		 *
4113 		 * TODO: change the dec abnrm to threaded irq
4114 		 */
4115 
4116 		dec = hdev->dec + relative_idx / 2;
4117 		if (relative_idx % 2) {
4118 			rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0,
4119 						gaudi2_irq_name(i), (void *) dec);
4120 		} else {
4121 			rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4122 					hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4123 					gaudi2_irq_name(i),
4124 					(void *) &hdev->user_interrupt[dec->core_id]);
4125 		}
4126 
4127 		if (rc) {
4128 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4129 			goto free_dec_irqs;
4130 		}
4131 	}
4132 
4133 	return 0;
4134 
4135 free_dec_irqs:
4136 	gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt));
4137 	return rc;
4138 }
4139 
4140 static int gaudi2_enable_msix(struct hl_device *hdev)
4141 {
4142 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4143 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4144 	int rc, irq, i, j, user_irq_init_cnt;
4145 	struct hl_cq *cq;
4146 
4147 	if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
4148 		return 0;
4149 
4150 	rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
4151 					PCI_IRQ_MSIX);
4152 	if (rc < 0) {
4153 		dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n",
4154 			GAUDI2_MSIX_ENTRIES, rc);
4155 		return rc;
4156 	}
4157 
4158 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4159 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4160 	rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
4161 	if (rc) {
4162 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4163 		goto free_irq_vectors;
4164 	}
4165 
4166 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4167 	rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE),
4168 			&hdev->event_queue);
4169 	if (rc) {
4170 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4171 		goto free_completion_irq;
4172 	}
4173 
4174 	rc = gaudi2_dec_enable_msix(hdev);
4175 	if (rc) {
4176 		dev_err(hdev->dev, "Failed to enable decoder IRQ");
4177 		goto free_event_irq;
4178 	}
4179 
4180 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4181 	rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4182 			hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4183 			gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT), &hdev->tpc_interrupt);
4184 	if (rc) {
4185 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4186 		goto free_dec_irq;
4187 	}
4188 
4189 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4190 	rc = request_irq(irq, hl_irq_handler_user_interrupt, 0,
4191 			gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
4192 					&hdev->unexpected_error_interrupt);
4193 	if (rc) {
4194 		dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4195 		goto free_tpc_irq;
4196 	}
4197 
4198 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
4199 			user_irq_init_cnt < prop->user_interrupt_count;
4200 			i++, j++, user_irq_init_cnt++) {
4201 
4202 		irq = pci_irq_vector(hdev->pdev, i);
4203 		rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt,
4204 						hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4205 						gaudi2_irq_name(i), &hdev->user_interrupt[j]);
4206 
4207 		if (rc) {
4208 			dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4209 			goto free_user_irq;
4210 		}
4211 	}
4212 
4213 	gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
4214 
4215 	return 0;
4216 
4217 free_user_irq:
4218 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count;
4219 			i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
4220 
4221 		irq = pci_irq_vector(hdev->pdev, i);
4222 		free_irq(irq, &hdev->user_interrupt[j]);
4223 	}
4224 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4225 	free_irq(irq, &hdev->unexpected_error_interrupt);
4226 free_tpc_irq:
4227 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4228 	free_irq(irq, &hdev->tpc_interrupt);
4229 free_dec_irq:
4230 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_DEC_LAST + 1);
4231 free_event_irq:
4232 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4233 	free_irq(irq, cq);
4234 
4235 free_completion_irq:
4236 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4237 	free_irq(irq, cq);
4238 
4239 free_irq_vectors:
4240 	pci_free_irq_vectors(hdev->pdev);
4241 
4242 	return rc;
4243 }
4244 
4245 static void gaudi2_sync_irqs(struct hl_device *hdev)
4246 {
4247 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4248 	int i, j;
4249 	int irq;
4250 
4251 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4252 		return;
4253 
4254 	/* Wait for all pending IRQs to be finished */
4255 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION));
4256 
4257 	for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) {
4258 		irq = pci_irq_vector(hdev->pdev, i);
4259 		synchronize_irq(irq);
4260 	}
4261 
4262 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT));
4263 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR));
4264 
4265 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
4266 										i++, j++) {
4267 		irq = pci_irq_vector(hdev->pdev, i);
4268 		synchronize_irq(irq);
4269 	}
4270 
4271 	synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
4272 }
4273 
4274 static void gaudi2_disable_msix(struct hl_device *hdev)
4275 {
4276 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4277 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4278 	struct hl_cq *cq;
4279 	int irq, i, j, k;
4280 
4281 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4282 		return;
4283 
4284 	gaudi2_sync_irqs(hdev);
4285 
4286 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4287 	free_irq(irq, &hdev->event_queue);
4288 
4289 	gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
4290 
4291 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4292 	free_irq(irq, &hdev->tpc_interrupt);
4293 
4294 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4295 	free_irq(irq, &hdev->unexpected_error_interrupt);
4296 
4297 	for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
4298 			k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
4299 
4300 		irq = pci_irq_vector(hdev->pdev, i);
4301 		free_irq(irq, &hdev->user_interrupt[j]);
4302 	}
4303 
4304 	irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4305 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4306 	free_irq(irq, cq);
4307 
4308 	pci_free_irq_vectors(hdev->pdev);
4309 
4310 	gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
4311 }
4312 
4313 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id)
4314 {
4315 	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4316 	u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4317 	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4318 	int rc;
4319 
4320 	if (hdev->pldm)
4321 		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4322 	else
4323 		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4324 
4325 	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4326 		dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
4327 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4328 			continue;
4329 
4330 		offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET;
4331 
4332 		WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0);
4333 
4334 		WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4335 
4336 		/* Wait till all traffic from decoder stops
4337 		 * before apply core reset.
4338 		 */
4339 		rc = hl_poll_timeout(
4340 				hdev,
4341 				mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4342 				graceful,
4343 				(graceful & graceful_pend_mask),
4344 				100,
4345 				timeout_usec);
4346 		if (rc)
4347 			dev_err(hdev->dev,
4348 				"Failed to stop traffic from DCORE%d Decoder %d\n",
4349 				dcore_id, dec_id);
4350 	}
4351 }
4352 
4353 static void gaudi2_stop_pcie_dec(struct hl_device *hdev)
4354 {
4355 	u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4356 	u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4357 	u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4358 	int rc;
4359 
4360 	if (hdev->pldm)
4361 		timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4362 	else
4363 		timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4364 
4365 	for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4366 		dec_bit = PCIE_DEC_SHIFT + dec_id;
4367 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4368 			continue;
4369 
4370 		offset = dec_id * PCIE_VDEC_OFFSET;
4371 
4372 		WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0);
4373 
4374 		WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4375 
4376 		/* Wait till all traffic from decoder stops
4377 		 * before apply core reset.
4378 		 */
4379 		rc = hl_poll_timeout(
4380 				hdev,
4381 				mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4382 				graceful,
4383 				(graceful & graceful_pend_mask),
4384 				100,
4385 				timeout_usec);
4386 		if (rc)
4387 			dev_err(hdev->dev,
4388 				"Failed to stop traffic from PCIe Decoder %d\n",
4389 				dec_id);
4390 	}
4391 }
4392 
4393 static void gaudi2_stop_dec(struct hl_device *hdev)
4394 {
4395 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4396 	int dcore_id;
4397 
4398 	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0)
4399 		return;
4400 
4401 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
4402 		gaudi2_stop_dcore_dec(hdev, dcore_id);
4403 
4404 	gaudi2_stop_pcie_dec(hdev);
4405 }
4406 
4407 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4408 {
4409 	u32 reg_base, reg_val;
4410 
4411 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
4412 	if (run_mode == HL_ENGINE_CORE_RUN)
4413 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
4414 	else
4415 		reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
4416 
4417 	WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
4418 }
4419 
4420 static void gaudi2_halt_arcs(struct hl_device *hdev)
4421 {
4422 	u16 arc_id;
4423 
4424 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
4425 		if (gaudi2_is_arc_enabled(hdev, arc_id))
4426 			gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
4427 	}
4428 }
4429 
4430 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4431 {
4432 	int rc;
4433 	u32 reg_base, val, ack_mask, timeout_usec = 100000;
4434 
4435 	if (hdev->pldm)
4436 		timeout_usec *= 100;
4437 
4438 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
4439 	if (run_mode == HL_ENGINE_CORE_RUN)
4440 		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
4441 	else
4442 		ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
4443 
4444 	rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
4445 				val, ((val & ack_mask) == ack_mask),
4446 				1000, timeout_usec);
4447 
4448 	if (!rc) {
4449 		/* Clear */
4450 		val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
4451 		WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
4452 	}
4453 
4454 	return rc;
4455 }
4456 
4457 static void gaudi2_reset_arcs(struct hl_device *hdev)
4458 {
4459 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4460 	u16 arc_id;
4461 
4462 	if (!gaudi2)
4463 		return;
4464 
4465 	for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++)
4466 		if (gaudi2_is_arc_enabled(hdev, arc_id))
4467 			gaudi2_clr_arc_id_cap(hdev, arc_id);
4468 }
4469 
4470 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
4471 {
4472 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4473 	u32 queue_id;
4474 	int i;
4475 
4476 	if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4477 		return;
4478 
4479 	queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4480 
4481 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4482 		if (!(hdev->nic_ports_mask & BIT(i)))
4483 			continue;
4484 
4485 		gaudi2_qman_manual_flush_common(hdev, queue_id);
4486 	}
4487 }
4488 
4489 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
4490 					u32 num_cores, u32 core_command)
4491 {
4492 	int i, rc;
4493 
4494 	for (i = 0 ; i < num_cores ; i++) {
4495 		if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
4496 			gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
4497 	}
4498 
4499 	for (i = 0 ; i < num_cores ; i++) {
4500 		if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
4501 			rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
4502 
4503 			if (rc) {
4504 				dev_err(hdev->dev, "failed to %s arc: %d\n",
4505 					(core_command == HL_ENGINE_CORE_HALT) ?
4506 					"HALT" : "RUN", core_ids[i]);
4507 				return -1;
4508 			}
4509 		}
4510 	}
4511 
4512 	return 0;
4513 }
4514 
4515 static int gaudi2_set_tpc_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4516 {
4517 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4518 	u32 reg_base, reg_addr, reg_val, tpc_id;
4519 
4520 	if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4521 		return 0;
4522 
4523 	tpc_id = gaudi2_tpc_engine_id_to_tpc_id[engine_id];
4524 	if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + tpc_id)))
4525 		return 0;
4526 
4527 	reg_base = gaudi2_tpc_cfg_blocks_bases[tpc_id];
4528 	reg_addr = reg_base + TPC_CFG_STALL_OFFSET;
4529 	reg_val = FIELD_PREP(DCORE0_TPC0_CFG_TPC_STALL_V_MASK,
4530 			(engine_command == HL_ENGINE_STALL) ? 1 : 0);
4531 	WREG32(reg_addr, reg_val);
4532 
4533 	if (engine_command == HL_ENGINE_RESUME) {
4534 		reg_base = gaudi2_tpc_eml_cfg_blocks_bases[tpc_id];
4535 		reg_addr = reg_base + TPC_EML_CFG_DBG_CNT_OFFSET;
4536 		RMWREG32(reg_addr, 0x1, DCORE0_TPC0_EML_CFG_DBG_CNT_DBG_EXIT_MASK);
4537 	}
4538 
4539 	return 0;
4540 }
4541 
4542 static int gaudi2_set_mme_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4543 {
4544 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4545 	u32 reg_base, reg_addr, reg_val, mme_id;
4546 
4547 	mme_id = gaudi2_mme_engine_id_to_mme_id[engine_id];
4548 	if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + mme_id)))
4549 		return 0;
4550 
4551 	reg_base = gaudi2_mme_ctrl_lo_blocks_bases[mme_id];
4552 	reg_addr = reg_base + MME_CTRL_LO_QM_STALL_OFFSET;
4553 	reg_val = FIELD_PREP(DCORE0_MME_CTRL_LO_QM_STALL_V_MASK,
4554 			(engine_command == HL_ENGINE_STALL) ? 1 : 0);
4555 	WREG32(reg_addr, reg_val);
4556 
4557 	return 0;
4558 }
4559 
4560 static int gaudi2_set_edma_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4561 {
4562 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4563 	u32 reg_base, reg_addr, reg_val, edma_id;
4564 
4565 	if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4566 		return 0;
4567 
4568 	edma_id = gaudi2_edma_engine_id_to_edma_id[engine_id];
4569 	if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + edma_id)))
4570 		return 0;
4571 
4572 	reg_base = gaudi2_dma_core_blocks_bases[edma_id];
4573 	reg_addr = reg_base + EDMA_CORE_CFG_STALL_OFFSET;
4574 	reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK,
4575 			(engine_command == HL_ENGINE_STALL) ? 1 : 0);
4576 	WREG32(reg_addr, reg_val);
4577 
4578 	if (engine_command == HL_ENGINE_STALL) {
4579 		reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK, 0x1) |
4580 				FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_FLUSH_MASK, 0x1);
4581 		WREG32(reg_addr, reg_val);
4582 	}
4583 
4584 	return 0;
4585 }
4586 
4587 static int gaudi2_set_engine_modes(struct hl_device *hdev,
4588 		u32 *engine_ids, u32 num_engines, u32 engine_command)
4589 {
4590 	int i, rc;
4591 
4592 	for (i = 0 ; i < num_engines ; ++i) {
4593 		switch (engine_ids[i]) {
4594 		case GAUDI2_DCORE0_ENGINE_ID_TPC_0 ... GAUDI2_DCORE0_ENGINE_ID_TPC_5:
4595 		case GAUDI2_DCORE1_ENGINE_ID_TPC_0 ... GAUDI2_DCORE1_ENGINE_ID_TPC_5:
4596 		case GAUDI2_DCORE2_ENGINE_ID_TPC_0 ... GAUDI2_DCORE2_ENGINE_ID_TPC_5:
4597 		case GAUDI2_DCORE3_ENGINE_ID_TPC_0 ... GAUDI2_DCORE3_ENGINE_ID_TPC_5:
4598 			rc = gaudi2_set_tpc_engine_mode(hdev, engine_ids[i], engine_command);
4599 			if (rc)
4600 				return rc;
4601 
4602 			break;
4603 		case GAUDI2_DCORE0_ENGINE_ID_MME:
4604 		case GAUDI2_DCORE1_ENGINE_ID_MME:
4605 		case GAUDI2_DCORE2_ENGINE_ID_MME:
4606 		case GAUDI2_DCORE3_ENGINE_ID_MME:
4607 			rc = gaudi2_set_mme_engine_mode(hdev, engine_ids[i], engine_command);
4608 			if (rc)
4609 				return rc;
4610 
4611 			break;
4612 		case GAUDI2_DCORE0_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE0_ENGINE_ID_EDMA_1:
4613 		case GAUDI2_DCORE1_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE1_ENGINE_ID_EDMA_1:
4614 		case GAUDI2_DCORE2_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE2_ENGINE_ID_EDMA_1:
4615 		case GAUDI2_DCORE3_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE3_ENGINE_ID_EDMA_1:
4616 			rc = gaudi2_set_edma_engine_mode(hdev, engine_ids[i], engine_command);
4617 			if (rc)
4618 				return rc;
4619 
4620 			break;
4621 		default:
4622 			dev_err(hdev->dev, "Invalid engine ID %u\n", engine_ids[i]);
4623 			return -EINVAL;
4624 		}
4625 	}
4626 
4627 	return 0;
4628 }
4629 
4630 static int gaudi2_set_engines(struct hl_device *hdev, u32 *engine_ids,
4631 					u32 num_engines, u32 engine_command)
4632 {
4633 	switch (engine_command) {
4634 	case HL_ENGINE_CORE_HALT:
4635 	case HL_ENGINE_CORE_RUN:
4636 		return gaudi2_set_engine_cores(hdev, engine_ids, num_engines, engine_command);
4637 
4638 	case HL_ENGINE_STALL:
4639 	case HL_ENGINE_RESUME:
4640 		return gaudi2_set_engine_modes(hdev, engine_ids, num_engines, engine_command);
4641 
4642 	default:
4643 		dev_err(hdev->dev, "failed to execute command id %u\n", engine_command);
4644 		return -EINVAL;
4645 	}
4646 }
4647 
4648 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4649 {
4650 	u32 wait_timeout_ms;
4651 
4652 	if (hdev->pldm)
4653 		wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC;
4654 	else
4655 		wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
4656 
4657 	if (fw_reset)
4658 		goto skip_engines;
4659 
4660 	gaudi2_stop_dma_qmans(hdev);
4661 	gaudi2_stop_mme_qmans(hdev);
4662 	gaudi2_stop_tpc_qmans(hdev);
4663 	gaudi2_stop_rot_qmans(hdev);
4664 	gaudi2_stop_nic_qmans(hdev);
4665 	msleep(wait_timeout_ms);
4666 
4667 	gaudi2_halt_arcs(hdev);
4668 	gaudi2_dma_stall(hdev);
4669 	gaudi2_mme_stall(hdev);
4670 	gaudi2_tpc_stall(hdev);
4671 	gaudi2_rotator_stall(hdev);
4672 
4673 	msleep(wait_timeout_ms);
4674 
4675 	gaudi2_stop_dec(hdev);
4676 
4677 	/*
4678 	 * in case of soft reset do a manual flush for QMANs (currently called
4679 	 * only for NIC QMANs
4680 	 */
4681 	if (!hard_reset)
4682 		gaudi2_nic_qmans_manual_flush(hdev);
4683 
4684 	gaudi2_disable_dma_qmans(hdev);
4685 	gaudi2_disable_mme_qmans(hdev);
4686 	gaudi2_disable_tpc_qmans(hdev);
4687 	gaudi2_disable_rot_qmans(hdev);
4688 	gaudi2_disable_nic_qmans(hdev);
4689 	gaudi2_disable_timestamp(hdev);
4690 
4691 skip_engines:
4692 	if (hard_reset) {
4693 		gaudi2_disable_msix(hdev);
4694 		return;
4695 	}
4696 
4697 	gaudi2_sync_irqs(hdev);
4698 }
4699 
4700 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
4701 {
4702 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
4703 
4704 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
4705 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
4706 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
4707 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
4708 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
4709 	pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
4710 }
4711 
4712 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
4713 {
4714 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4715 	struct dynamic_fw_load_mgr *dynamic_loader;
4716 	struct cpu_dyn_regs *dyn_regs;
4717 
4718 	/* fill common fields */
4719 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
4720 	fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE;
4721 	fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE;
4722 	fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC;
4723 	fw_loader->skip_bmc = false;
4724 	fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
4725 	fw_loader->dram_bar_id = DRAM_BAR_ID;
4726 	fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
4727 
4728 	/* here we update initial values for few specific dynamic regs (as
4729 	 * before reading the first descriptor from FW those value has to be
4730 	 * hard-coded). in later stages of the protocol those values will be
4731 	 * updated automatically by reading the FW descriptor so data there
4732 	 * will always be up-to-date
4733 	 */
4734 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
4735 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
4736 	dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
4737 	dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
4738 	dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC;
4739 }
4740 
4741 static int gaudi2_init_cpu(struct hl_device *hdev)
4742 {
4743 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4744 	int rc;
4745 
4746 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4747 		return 0;
4748 
4749 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU)
4750 		return 0;
4751 
4752 	rc = hl_fw_init_cpu(hdev);
4753 	if (rc)
4754 		return rc;
4755 
4756 	gaudi2->hw_cap_initialized |= HW_CAP_CPU;
4757 
4758 	return 0;
4759 }
4760 
4761 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4762 {
4763 	struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
4764 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4765 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4766 	struct cpu_dyn_regs *dyn_regs;
4767 	struct hl_eq *eq;
4768 	u32 status;
4769 	int err;
4770 
4771 	if (!hdev->cpu_queues_enable)
4772 		return 0;
4773 
4774 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
4775 		return 0;
4776 
4777 	eq = &hdev->event_queue;
4778 
4779 	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4780 
4781 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4782 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4783 
4784 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4785 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4786 
4787 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address));
4788 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address));
4789 
4790 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4791 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4792 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4793 
4794 	/* Used for EQ CI */
4795 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4796 
4797 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
4798 
4799 	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4800 
4801 	/* Let the ARC know we are ready as it is now handling those queues  */
4802 
4803 	WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
4804 		gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
4805 
4806 	err = hl_poll_timeout(
4807 		hdev,
4808 		mmCPU_IF_QUEUE_INIT,
4809 		status,
4810 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
4811 		1000,
4812 		cpu_timeout);
4813 
4814 	if (err) {
4815 		dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n");
4816 		return -EIO;
4817 	}
4818 
4819 	/* update FW application security bits */
4820 	if (prop->fw_cpu_boot_dev_sts0_valid)
4821 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4822 
4823 	if (prop->fw_cpu_boot_dev_sts1_valid)
4824 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4825 
4826 	gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q;
4827 	return 0;
4828 }
4829 
4830 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
4831 				u32 queue_id_base)
4832 {
4833 	struct hl_hw_queue *q;
4834 	u32 pq_id, pq_offset;
4835 
4836 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4837 		q = &hdev->kernel_queues[queue_id_base + pq_id];
4838 		pq_offset = pq_id * 4;
4839 
4840 		WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
4841 				lower_32_bits(q->bus_address));
4842 		WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
4843 				upper_32_bits(q->bus_address));
4844 		WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
4845 		WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
4846 		WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
4847 	}
4848 }
4849 
4850 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base)
4851 {
4852 	u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi;
4853 
4854 	mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4855 	mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
4856 	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4857 	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4858 
4859 	for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) {
4860 		cp_offset = cp_id * 4;
4861 
4862 		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo);
4863 		WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset,	mtr_base_hi);
4864 		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset,	so_base_lo);
4865 		WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset,	so_base_hi);
4866 	}
4867 
4868 	/* allow QMANs to accept work from ARC CQF */
4869 	WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1));
4870 }
4871 
4872 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base,
4873 				u32 queue_id_base)
4874 {
4875 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
4876 	u32 pq_id, pq_offset, so_base_lo, so_base_hi;
4877 
4878 	so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4879 	so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
4880 
4881 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
4882 		pq_offset = pq_id * 4;
4883 
4884 		/* Configure QMAN HBW to scratchpad as it is not needed */
4885 		WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset,
4886 				lower_32_bits(gaudi2->scratchpad_bus_address));
4887 		WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset,
4888 				upper_32_bits(gaudi2->scratchpad_bus_address));
4889 		WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset,
4890 				ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry)));
4891 
4892 		WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0);
4893 		WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA);
4894 		WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo);
4895 		WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi);
4896 	}
4897 
4898 	/* Enable QMAN H/W completion */
4899 	WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
4900 }
4901 
4902 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base)
4903 {
4904 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4905 	u32 sp_reg_addr;
4906 
4907 	switch (queue_id_base) {
4908 	case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3:
4909 		fallthrough;
4910 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
4911 		fallthrough;
4912 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
4913 		fallthrough;
4914 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
4915 		fallthrough;
4916 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
4917 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
4918 		break;
4919 	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
4920 		fallthrough;
4921 	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
4922 		fallthrough;
4923 	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
4924 		fallthrough;
4925 	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
4926 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
4927 		break;
4928 	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
4929 		fallthrough;
4930 	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
4931 		fallthrough;
4932 	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
4933 		fallthrough;
4934 	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
4935 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
4936 		break;
4937 	case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3:
4938 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl);
4939 		break;
4940 	case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3:
4941 		sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
4942 		break;
4943 	default:
4944 		dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base);
4945 		return 0;
4946 	}
4947 
4948 	return sp_reg_addr;
4949 }
4950 
4951 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
4952 					u32 queue_id_base)
4953 {
4954 	u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset;
4955 	int map_table_entry;
4956 
4957 	WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot);
4958 
4959 	irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base);
4960 	WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset));
4961 	WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset));
4962 
4963 	map_table_entry = gaudi2_qman_async_event_id[queue_id_base];
4964 	WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET,
4965 		gaudi2_irq_map_table[map_table_entry].cpu_id);
4966 
4967 	WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK);
4968 
4969 	WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT);
4970 	WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0);
4971 	WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
4972 
4973 	/* Enable the QMAN channel.
4974 	 * PDMA QMAN configuration is different, as we do not allow user to
4975 	 * access some of the CPs.
4976 	 * PDMA0: CP2/3 are reserved for the ARC usage.
4977 	 * PDMA1: CP1/2/3 are reserved for the ARC usage.
4978 	 */
4979 	if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
4980 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
4981 	else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
4982 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
4983 	else
4984 		WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
4985 }
4986 
4987 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
4988 		u32 queue_id_base)
4989 {
4990 	u32 pq_id;
4991 
4992 	for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
4993 		hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
4994 
4995 	gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
4996 	gaudi2_init_qman_cp(hdev, reg_base);
4997 	gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base);
4998 	gaudi2_init_qman_common(hdev, reg_base, queue_id_base);
4999 }
5000 
5001 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base,
5002 				u32 dma_core_id, bool is_secure)
5003 {
5004 	u32 prot, irq_handler_offset;
5005 	struct cpu_dyn_regs *dyn_regs;
5006 	int map_table_entry;
5007 
5008 	prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT;
5009 	if (is_secure)
5010 		prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT;
5011 
5012 	WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot);
5013 
5014 	dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5015 	irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
5016 
5017 	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET,
5018 			lower_32_bits(CFG_BASE + irq_handler_offset));
5019 
5020 	WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET,
5021 			upper_32_bits(CFG_BASE + irq_handler_offset));
5022 
5023 	map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id];
5024 	WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET,
5025 		gaudi2_irq_map_table[map_table_entry].cpu_id);
5026 
5027 	/* Enable the DMA channel */
5028 	WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT);
5029 }
5030 
5031 static void gaudi2_init_kdma(struct hl_device *hdev)
5032 {
5033 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5034 	u32 reg_base;
5035 
5036 	if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA)
5037 		return;
5038 
5039 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA];
5040 
5041 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true);
5042 
5043 	gaudi2->hw_cap_initialized |= HW_CAP_KDMA;
5044 }
5045 
5046 static void gaudi2_init_pdma(struct hl_device *hdev)
5047 {
5048 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5049 	u32 reg_base;
5050 
5051 	if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK)
5052 		return;
5053 
5054 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0];
5055 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false);
5056 
5057 	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0];
5058 	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0);
5059 
5060 	reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1];
5061 	gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false);
5062 
5063 	reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0];
5064 	gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0);
5065 
5066 	gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK;
5067 }
5068 
5069 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq)
5070 {
5071 	u32 reg_base, base_edma_core_id, base_edma_qman_id;
5072 
5073 	base_edma_core_id = DMA_CORE_ID_EDMA0 + seq;
5074 	base_edma_qman_id = edma_stream_base[seq];
5075 
5076 	reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id];
5077 	gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false);
5078 
5079 	reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id];
5080 	gaudi2_init_qman(hdev, reg_base, base_edma_qman_id);
5081 }
5082 
5083 static void gaudi2_init_edma(struct hl_device *hdev)
5084 {
5085 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5086 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5087 	int dcore, inst;
5088 
5089 	if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK)
5090 		return;
5091 
5092 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
5093 		for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
5094 			u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
5095 
5096 			if (!(prop->edma_enabled_mask & BIT(seq)))
5097 				continue;
5098 
5099 			gaudi2_init_edma_instance(hdev, seq);
5100 
5101 			gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq);
5102 		}
5103 	}
5104 }
5105 
5106 /*
5107  * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
5108  * @hdev: pointer to habanalabs device structure.
5109  * @sob_id: sync object ID.
5110  * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
5111  * @interrupt_id: interrupt ID.
5112  *
5113  * Some initiators cannot have HBW address in their completion address registers, and thus cannot
5114  * write directly to the HBW host memory of the virtual MSI-X doorbell.
5115  * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
5116  *
5117  * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
5118  * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
5119  * completion, by decrementing the sync object value and re-arming the monitor.
5120  */
5121 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
5122 							u32 first_mon_id, u32 interrupt_id)
5123 {
5124 	u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
5125 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5126 	u64 addr;
5127 	u8 mask;
5128 
5129 	/* Reset the SOB value */
5130 	sob_offset = sob_id * sizeof(u32);
5131 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
5132 
5133 	/* Configure 3 monitors:
5134 	 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
5135 	 * 2. Decrement SOB value by 1.
5136 	 * 3. Re-arm the master monitor.
5137 	 */
5138 
5139 	first_mon_offset = first_mon_id * sizeof(u32);
5140 
5141 	/* 2nd monitor: Decrement SOB value by 1 */
5142 	mon_offset = first_mon_offset + sizeof(u32);
5143 
5144 	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
5145 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5146 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5147 
5148 	payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
5149 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
5150 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
5151 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5152 
5153 	/* 3rd monitor: Re-arm the master monitor */
5154 	mon_offset = first_mon_offset + 2 * sizeof(u32);
5155 
5156 	addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
5157 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5158 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5159 
5160 	sob_group = sob_id / 8;
5161 	mask = ~BIT(sob_id & 0x7);
5162 	mode = 0; /* comparison mode is "greater than or equal to" */
5163 	arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
5164 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
5165 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
5166 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
5167 
5168 	payload = arm;
5169 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5170 
5171 	/* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
5172 	mon_offset = first_mon_offset;
5173 
5174 	config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
5175 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
5176 
5177 	addr = gaudi2->virt_msix_db_dma_addr;
5178 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5179 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5180 
5181 	payload = interrupt_id;
5182 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5183 
5184 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
5185 }
5186 
5187 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
5188 {
5189 	u32 decoder_id, sob_id, first_mon_id, interrupt_id;
5190 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5191 
5192 	/* Decoder normal/abnormal interrupts */
5193 	for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
5194 		if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
5195 			continue;
5196 
5197 		sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5198 		first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
5199 		interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
5200 		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5201 
5202 		sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5203 		first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
5204 		interrupt_id += 1;
5205 		gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5206 	}
5207 }
5208 
5209 static void gaudi2_init_sm(struct hl_device *hdev)
5210 {
5211 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5212 	u64 cq_address;
5213 	u32 reg_val;
5214 	int i;
5215 
5216 	/* Enable HBW/LBW CQ for completion monitors */
5217 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5218 	reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1);
5219 
5220 	for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++)
5221 		WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5222 
5223 	/* Enable only HBW CQ for KDMA completion monitor */
5224 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5225 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5226 
5227 	/* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */
5228 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr));
5229 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr));
5230 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
5231 
5232 	for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
5233 		cq_address =
5234 			hdev->completion_queue[i].bus_address;
5235 
5236 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i),
5237 							lower_32_bits(cq_address));
5238 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i),
5239 							upper_32_bits(cq_address));
5240 		WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i),
5241 							ilog2(HL_CQ_SIZE_IN_BYTES));
5242 	}
5243 
5244 	/* Configure kernel ASID and MMU BP*/
5245 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
5246 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
5247 
5248 	/* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
5249 	gaudi2_prepare_sm_for_virt_msix_db(hdev);
5250 }
5251 
5252 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
5253 {
5254 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5255 	u32 reg_val;
5256 	int i;
5257 
5258 	reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0);
5259 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1);
5260 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1);
5261 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1);
5262 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1);
5263 	reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1);
5264 
5265 	WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val);
5266 	WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF);
5267 
5268 	for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) {
5269 		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i);
5270 		WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]);
5271 	}
5272 }
5273 
5274 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
5275 							bool config_qman_only)
5276 {
5277 	u32 queue_id_base, reg_base;
5278 
5279 	switch (dcore_id) {
5280 	case 0:
5281 		queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
5282 		break;
5283 	case 1:
5284 		queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
5285 		break;
5286 	case 2:
5287 		queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
5288 		break;
5289 	case 3:
5290 		queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
5291 		break;
5292 	default:
5293 		dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
5294 		return;
5295 	}
5296 
5297 	if (!config_qman_only) {
5298 		reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
5299 		gaudi2_init_mme_acc(hdev, reg_base);
5300 	}
5301 
5302 	reg_base = gaudi2_qm_blocks_bases[queue_id_base];
5303 	gaudi2_init_qman(hdev, reg_base, queue_id_base);
5304 }
5305 
5306 static void gaudi2_init_mme(struct hl_device *hdev)
5307 {
5308 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5309 	int i;
5310 
5311 	if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK)
5312 		return;
5313 
5314 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
5315 		gaudi2_init_dcore_mme(hdev, i, false);
5316 
5317 		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i);
5318 	}
5319 }
5320 
5321 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base)
5322 {
5323 	/* Mask arithmetic and QM interrupts in TPC */
5324 	WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE);
5325 
5326 	/* Set 16 cache lines */
5327 	WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET,
5328 			2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT);
5329 }
5330 
5331 struct gaudi2_tpc_init_cfg_data {
5332 	enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES];
5333 };
5334 
5335 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
5336 					u32 offset, struct iterate_module_ctx *ctx)
5337 {
5338 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5339 	struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
5340 	u32 queue_id_base;
5341 	u8 seq;
5342 
5343 	queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN);
5344 
5345 	if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1))
5346 		/* gets last sequence number */
5347 		seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE;
5348 	else
5349 		seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
5350 
5351 	gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset);
5352 	gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base);
5353 
5354 	gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq);
5355 }
5356 
5357 static void gaudi2_init_tpc(struct hl_device *hdev)
5358 {
5359 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5360 	struct gaudi2_tpc_init_cfg_data init_cfg_data;
5361 	struct iterate_module_ctx tpc_iter;
5362 
5363 	if (!hdev->asic_prop.tpc_enabled_mask)
5364 		return;
5365 
5366 	if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK)
5367 		return;
5368 
5369 	init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0;
5370 	init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0;
5371 	init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0;
5372 	init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0;
5373 	tpc_iter.fn = &gaudi2_init_tpc_config;
5374 	tpc_iter.data = &init_cfg_data;
5375 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
5376 }
5377 
5378 static void gaudi2_init_rotator(struct hl_device *hdev)
5379 {
5380 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5381 	u32 i, reg_base, queue_id;
5382 
5383 	queue_id = GAUDI2_QUEUE_ID_ROT_0_0;
5384 
5385 	for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
5386 		reg_base = gaudi2_qm_blocks_bases[queue_id];
5387 		gaudi2_init_qman(hdev, reg_base, queue_id);
5388 
5389 		gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i);
5390 	}
5391 }
5392 
5393 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
5394 {
5395 	u32 sob_id;
5396 
5397 	/* VCMD normal interrupt */
5398 	sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5399 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
5400 			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5401 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5402 
5403 	/* VCMD abnormal interrupt */
5404 	sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5405 	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
5406 			mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5407 	WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5408 }
5409 
5410 static void gaudi2_init_dec(struct hl_device *hdev)
5411 {
5412 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5413 	u32 dcore_id, dec_id, dec_bit;
5414 	u64 base_addr;
5415 
5416 	if (!hdev->asic_prop.decoder_enabled_mask)
5417 		return;
5418 
5419 	if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK)
5420 		return;
5421 
5422 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5423 		for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
5424 			dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
5425 
5426 			if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5427 				continue;
5428 
5429 			base_addr =  mmDCORE0_DEC0_CMD_BASE +
5430 					BRDG_CTRL_BLOCK_OFFSET +
5431 					dcore_id * DCORE_OFFSET +
5432 					dec_id * DCORE_VDEC_OFFSET;
5433 
5434 			gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5435 
5436 			gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5437 		}
5438 
5439 	for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) {
5440 		dec_bit = PCIE_DEC_SHIFT + dec_id;
5441 		if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5442 			continue;
5443 
5444 		base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET +
5445 				dec_id * DCORE_VDEC_OFFSET;
5446 
5447 		gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5448 
5449 		gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5450 	}
5451 }
5452 
5453 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
5454 					u32 stlb_base, u32 asid, u64 phys_addr)
5455 {
5456 	u32 status, timeout_usec;
5457 	int rc;
5458 
5459 	if (hdev->pldm || !hdev->pdev)
5460 		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5461 	else
5462 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5463 
5464 	WREG32(stlb_base + STLB_ASID_OFFSET, asid);
5465 	WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
5466 	WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT);
5467 	WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000);
5468 
5469 	rc = hl_poll_timeout(
5470 		hdev,
5471 		stlb_base + STLB_BUSY_OFFSET,
5472 		status,
5473 		!(status & 0x80000000),
5474 		1000,
5475 		timeout_usec);
5476 
5477 	if (rc) {
5478 		dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid);
5479 		return rc;
5480 	}
5481 
5482 	return 0;
5483 }
5484 
5485 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base,
5486 					u32 start_offset, u32 inv_start_val,
5487 					u32 flags)
5488 {
5489 	/* clear PMMU mem line cache (only needed in mmu range invalidation) */
5490 	if (flags & MMU_OP_CLEAR_MEMCACHE)
5491 		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1);
5492 
5493 	if (flags & MMU_OP_SKIP_LOW_CACHE_INV)
5494 		return;
5495 
5496 	WREG32(stlb_base + start_offset, inv_start_val);
5497 }
5498 
5499 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base,
5500 						struct gaudi2_cache_invld_params *inv_params)
5501 {
5502 	u32 status, timeout_usec, start_offset;
5503 	int rc;
5504 
5505 	timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC :
5506 					GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5507 
5508 	/* poll PMMU mem line cache (only needed in mmu range invalidation) */
5509 	if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) {
5510 		rc = hl_poll_timeout(
5511 			hdev,
5512 			mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS,
5513 			status,
5514 			status & 0x1,
5515 			1000,
5516 			timeout_usec);
5517 
5518 		if (rc)
5519 			return rc;
5520 
5521 		/* Need to manually reset the status to 0 */
5522 		WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0);
5523 	}
5524 
5525 	/* Lower cache does not work with cache lines, hence we can skip its
5526 	 * invalidation upon map and invalidate only upon unmap
5527 	 */
5528 	if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV)
5529 		return 0;
5530 
5531 	start_offset = inv_params->range_invalidation ?
5532 			STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET;
5533 
5534 	rc = hl_poll_timeout(
5535 		hdev,
5536 		stlb_base + start_offset,
5537 		status,
5538 		!(status & 0x1),
5539 		1000,
5540 		timeout_usec);
5541 
5542 	return rc;
5543 }
5544 
5545 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id)
5546 {
5547 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5548 	u32 hw_cap;
5549 
5550 	hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id);
5551 
5552 	if (gaudi2->hw_cap_initialized & hw_cap)
5553 		return true;
5554 
5555 	return false;
5556 }
5557 
5558 /* this function shall be called only for HMMUs for which capability bit is set */
5559 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id)
5560 {
5561 	u32 offset;
5562 
5563 	offset =  (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5564 	return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset);
5565 }
5566 
5567 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base,
5568 						struct gaudi2_cache_invld_params *inv_params)
5569 {
5570 	u32 start_offset;
5571 
5572 	if (inv_params->range_invalidation) {
5573 		/* Set the addresses range
5574 		 * Note: that the start address we set in register, is not included in
5575 		 * the range of the invalidation, by design.
5576 		 * that's why we need to set lower address than the one we actually
5577 		 * want to be included in the range invalidation.
5578 		 */
5579 		u64 start = inv_params->start_va - 1;
5580 
5581 		start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET;
5582 
5583 		WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET,
5584 				start >> MMU_RANGE_INV_VA_LSB_SHIFT);
5585 
5586 		WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET,
5587 				start >> MMU_RANGE_INV_VA_MSB_SHIFT);
5588 
5589 		WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET,
5590 				inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT);
5591 
5592 		WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET,
5593 				inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT);
5594 	} else {
5595 		start_offset = STLB_INV_ALL_START_OFFSET;
5596 	}
5597 
5598 	gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset,
5599 						inv_params->inv_start_val, inv_params->flags);
5600 }
5601 
5602 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev,
5603 						int dcore_id, int hmmu_id,
5604 						struct gaudi2_cache_invld_params *inv_params)
5605 {
5606 	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5607 
5608 	gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params);
5609 }
5610 
5611 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev,
5612 						int dcore_id, int hmmu_id,
5613 						struct gaudi2_cache_invld_params *inv_params)
5614 {
5615 	u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5616 
5617 	return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params);
5618 }
5619 
5620 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev,
5621 						struct gaudi2_cache_invld_params *inv_params)
5622 {
5623 	int dcore_id, hmmu_id;
5624 
5625 	/* first send all invalidation commands */
5626 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5627 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5628 			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5629 				continue;
5630 
5631 			gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params);
5632 		}
5633 	}
5634 
5635 	/* next, poll all invalidations status */
5636 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5637 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5638 			int rc;
5639 
5640 			if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5641 				continue;
5642 
5643 			rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id,
5644 										inv_params);
5645 			if (rc)
5646 				return rc;
5647 		}
5648 	}
5649 
5650 	return 0;
5651 }
5652 
5653 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
5654 {
5655 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5656 	struct gaudi2_cache_invld_params invld_params;
5657 	int rc = 0;
5658 
5659 	if (hdev->reset_info.hard_reset_pending)
5660 		return rc;
5661 
5662 	invld_params.range_invalidation = false;
5663 	invld_params.inv_start_val = 1;
5664 
5665 	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5666 		invld_params.flags = flags;
5667 		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5668 		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5669 										&invld_params);
5670 	} else if (flags & MMU_OP_PHYS_PACK) {
5671 		invld_params.flags = 0;
5672 		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5673 	}
5674 
5675 	return rc;
5676 }
5677 
5678 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
5679 				u32 flags, u32 asid, u64 va, u64 size)
5680 {
5681 	struct gaudi2_cache_invld_params invld_params = {0};
5682 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5683 	u64 start_va, end_va;
5684 	u32 inv_start_val;
5685 	int rc = 0;
5686 
5687 	if (hdev->reset_info.hard_reset_pending)
5688 		return 0;
5689 
5690 	inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT |
5691 			1 << MMU_RANGE_INV_ASID_EN_SHIFT |
5692 			asid << MMU_RANGE_INV_ASID_SHIFT);
5693 	start_va = va;
5694 	end_va = start_va + size;
5695 
5696 	if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5697 		/* As range invalidation does not support zero address we will
5698 		 * do full invalidation in this case
5699 		 */
5700 		if (start_va) {
5701 			invld_params.range_invalidation = true;
5702 			invld_params.start_va = start_va;
5703 			invld_params.end_va = end_va;
5704 			invld_params.inv_start_val = inv_start_val;
5705 			invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE;
5706 		} else {
5707 			invld_params.range_invalidation = false;
5708 			invld_params.inv_start_val = 1;
5709 			invld_params.flags = flags;
5710 		}
5711 
5712 
5713 		gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5714 		rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5715 										&invld_params);
5716 		if (rc)
5717 			return rc;
5718 
5719 	} else if (flags & MMU_OP_PHYS_PACK) {
5720 		invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va);
5721 		invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va);
5722 		invld_params.inv_start_val = inv_start_val;
5723 		invld_params.flags = flags;
5724 		rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5725 	}
5726 
5727 	return rc;
5728 }
5729 
5730 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
5731 {
5732 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5733 	u64 hop0_addr;
5734 	u32 asid, max_asid = prop->max_asid;
5735 	int rc;
5736 
5737 	/* it takes too much time to init all of the ASIDs on palladium */
5738 	if (hdev->pldm)
5739 		max_asid = min((u32) 8, max_asid);
5740 
5741 	for (asid = 0 ; asid < max_asid ; asid++) {
5742 		hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
5743 		rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
5744 		if (rc) {
5745 			dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
5746 			return rc;
5747 		}
5748 	}
5749 
5750 	return 0;
5751 }
5752 
5753 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base)
5754 {
5755 	u32 status, timeout_usec;
5756 	int rc;
5757 
5758 	if (hdev->pldm || !hdev->pdev)
5759 		timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5760 	else
5761 		timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5762 
5763 	WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1);
5764 
5765 	rc = hl_poll_timeout(
5766 		hdev,
5767 		stlb_base + STLB_SRAM_INIT_OFFSET,
5768 		status,
5769 		!status,
5770 		1000,
5771 		timeout_usec);
5772 
5773 	if (rc)
5774 		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
5775 
5776 	rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base);
5777 	if (rc)
5778 		return rc;
5779 
5780 	WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
5781 
5782 	rc = hl_poll_timeout(
5783 		hdev,
5784 		stlb_base + STLB_INV_ALL_START_OFFSET,
5785 		status,
5786 		!status,
5787 		1000,
5788 		timeout_usec);
5789 
5790 	if (rc)
5791 		dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n");
5792 
5793 	WREG32(mmu_base + MMU_ENABLE_OFFSET, 1);
5794 
5795 	return rc;
5796 }
5797 
5798 static int gaudi2_pci_mmu_init(struct hl_device *hdev)
5799 {
5800 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5801 	u32 mmu_base, stlb_base;
5802 	int rc;
5803 
5804 	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU)
5805 		return 0;
5806 
5807 	mmu_base = mmPMMU_HBW_MMU_BASE;
5808 	stlb_base = mmPMMU_HBW_STLB_BASE;
5809 
5810 	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5811 		(0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
5812 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
5813 		(4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
5814 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) |
5815 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT),
5816 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5817 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5818 		PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5819 		PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5820 		PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5821 
5822 	WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0);
5823 
5824 	if (PAGE_SIZE == SZ_64K) {
5825 		/* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
5826 		RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
5827 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
5828 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
5829 			FIELD_PREP(
5830 				DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK,
5831 				1),
5832 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK |
5833 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK |
5834 			DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
5835 	}
5836 
5837 	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
5838 
5839 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5840 	if (rc)
5841 		return rc;
5842 
5843 	gaudi2->hw_cap_initialized |= HW_CAP_PMMU;
5844 
5845 	return 0;
5846 }
5847 
5848 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
5849 				int hmmu_id)
5850 {
5851 	struct asic_fixed_properties *prop = &hdev->asic_prop;
5852 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5853 	u32 offset, mmu_base, stlb_base, hw_cap;
5854 	u8 dmmu_seq;
5855 	int rc;
5856 
5857 	dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id;
5858 	hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq;
5859 
5860 	/*
5861 	 * return if DMMU is already initialized or if it's not out of
5862 	 * isolation (due to cluster binning)
5863 	 */
5864 	if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq)))
5865 		return 0;
5866 
5867 	offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5868 	mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset;
5869 	stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset;
5870 
5871 	RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
5872 			MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
5873 
5874 	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
5875 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
5876 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
5877 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
5878 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) |
5879 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3),
5880 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
5881 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
5882 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
5883 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
5884 			DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
5885 
5886 	RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
5887 			STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
5888 
5889 	WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
5890 
5891 	rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
5892 	if (rc)
5893 		return rc;
5894 
5895 	gaudi2->hw_cap_initialized |= hw_cap;
5896 
5897 	return 0;
5898 }
5899 
5900 static int gaudi2_hbm_mmu_init(struct hl_device *hdev)
5901 {
5902 	int rc, dcore_id, hmmu_id;
5903 
5904 	for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5905 		for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) {
5906 			rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id);
5907 			if (rc)
5908 				return rc;
5909 		}
5910 
5911 	return 0;
5912 }
5913 
5914 static int gaudi2_mmu_init(struct hl_device *hdev)
5915 {
5916 	int rc;
5917 
5918 	rc = gaudi2_pci_mmu_init(hdev);
5919 	if (rc)
5920 		return rc;
5921 
5922 	rc = gaudi2_hbm_mmu_init(hdev);
5923 	if (rc)
5924 		return rc;
5925 
5926 	return 0;
5927 }
5928 
5929 static int gaudi2_hw_init(struct hl_device *hdev)
5930 {
5931 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
5932 	int rc;
5933 
5934 	/* Let's mark in the H/W that we have reached this point. We check
5935 	 * this value in the reset_before_init function to understand whether
5936 	 * we need to reset the chip before doing H/W init. This register is
5937 	 * cleared by the H/W upon H/W reset
5938 	 */
5939 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
5940 
5941 	/* Perform read from the device to make sure device is up */
5942 	RREG32(mmHW_STATE);
5943 
5944 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
5945 	 * So we set it here and if anyone tries to move it later to
5946 	 * a different address, there will be an error
5947 	 */
5948 	if (hdev->asic_prop.iatu_done_by_fw)
5949 		gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE;
5950 
5951 	/*
5952 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
5953 	 * base address of dram
5954 	 */
5955 	if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
5956 		dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n");
5957 		return -EIO;
5958 	}
5959 
5960 	rc = gaudi2_init_cpu(hdev);
5961 	if (rc) {
5962 		dev_err(hdev->dev, "failed to initialize CPU\n");
5963 		return rc;
5964 	}
5965 
5966 	gaudi2_init_scrambler_hbm(hdev);
5967 	gaudi2_init_kdma(hdev);
5968 
5969 	rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC);
5970 	if (rc) {
5971 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc);
5972 		return rc;
5973 	}
5974 
5975 	rc = gaudi2->cpucp_info_get(hdev);
5976 	if (rc) {
5977 		dev_err(hdev->dev, "Failed to get cpucp info\n");
5978 		return rc;
5979 	}
5980 
5981 	rc = gaudi2_mmu_init(hdev);
5982 	if (rc)
5983 		return rc;
5984 
5985 	gaudi2_init_pdma(hdev);
5986 	gaudi2_init_edma(hdev);
5987 	gaudi2_init_sm(hdev);
5988 	gaudi2_init_tpc(hdev);
5989 	gaudi2_init_mme(hdev);
5990 	gaudi2_init_rotator(hdev);
5991 	gaudi2_init_dec(hdev);
5992 	gaudi2_enable_timestamp(hdev);
5993 
5994 	rc = gaudi2_coresight_init(hdev);
5995 	if (rc)
5996 		goto disable_queues;
5997 
5998 	rc = gaudi2_enable_msix(hdev);
5999 	if (rc)
6000 		goto disable_queues;
6001 
6002 	/* Perform read from the device to flush all configuration */
6003 	RREG32(mmHW_STATE);
6004 
6005 	return 0;
6006 
6007 disable_queues:
6008 	gaudi2_disable_dma_qmans(hdev);
6009 	gaudi2_disable_mme_qmans(hdev);
6010 	gaudi2_disable_tpc_qmans(hdev);
6011 	gaudi2_disable_rot_qmans(hdev);
6012 	gaudi2_disable_nic_qmans(hdev);
6013 
6014 	gaudi2_disable_timestamp(hdev);
6015 
6016 	return rc;
6017 }
6018 
6019 /**
6020  * gaudi2_send_hard_reset_cmd - common function to handle reset
6021  *
6022  * @hdev: pointer to the habanalabs device structure
6023  *
6024  * This function handles the various possible scenarios for reset.
6025  * It considers if reset is handled by driver\FW and what FW components are loaded
6026  */
6027 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
6028 {
6029 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6030 	bool heartbeat_reset, preboot_only, cpu_initialized = false;
6031 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6032 	u32 cpu_boot_status;
6033 
6034 	preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU);
6035 	heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT);
6036 
6037 	/*
6038 	 * Handle corner case where failure was at cpu management app load,
6039 	 * and driver didn't detect any failure while loading the FW,
6040 	 * then at such scenario driver will send only HALT_MACHINE
6041 	 * and no one will respond to this request since FW already back to preboot
6042 	 * and it cannot handle such cmd.
6043 	 * In this case next time the management app loads it'll check on events register
6044 	 * which will still have the halt indication, and will reboot the device.
6045 	 * The solution is to let preboot clear all relevant registers before next boot
6046 	 * once driver send COMMS_RST_DEV.
6047 	 */
6048 	cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS);
6049 
6050 	if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) &&
6051 			(cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL))
6052 		cpu_initialized = true;
6053 
6054 	/*
6055 	 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways:
6056 	 * 1. FW reset: FW initiate the reset sequence
6057 	 * 2. driver reset: FW will start HALT sequence (the preparations for the
6058 	 *                  reset but not the reset itself as it is not implemented
6059 	 *                  on their part) and LKD will wait to let FW complete the
6060 	 *                  sequence before issuing the reset
6061 	 */
6062 	if (!preboot_only && cpu_initialized) {
6063 		WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq),
6064 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id);
6065 
6066 		msleep(GAUDI2_CPU_RESET_WAIT_MSEC);
6067 	}
6068 
6069 	/*
6070 	 * When working with preboot (without Linux/Boot fit) we can
6071 	 * communicate only using the COMMS commands to issue halt/reset.
6072 	 *
6073 	 * For the case in which we are working with Linux/Bootfit this is a hail-mary
6074 	 * attempt to revive the card in the small chance that the f/w has
6075 	 * experienced a watchdog event, which caused it to return back to preboot.
6076 	 * In that case, triggering reset through GIC won't help. We need to
6077 	 * trigger the reset as if Linux wasn't loaded.
6078 	 *
6079 	 * We do it only if the reset cause was HB, because that would be the
6080 	 * indication of such an event.
6081 	 *
6082 	 * In case watchdog hasn't expired but we still got HB, then this won't
6083 	 * do any damage.
6084 	 */
6085 
6086 	if (heartbeat_reset || preboot_only || !cpu_initialized) {
6087 		if (hdev->asic_prop.hard_reset_done_by_fw)
6088 			hl_fw_ask_hard_reset_without_linux(hdev);
6089 		else
6090 			hl_fw_ask_halt_machine_without_linux(hdev);
6091 	}
6092 }
6093 
6094 /**
6095  * gaudi2_execute_hard_reset - execute hard reset by driver/FW
6096  *
6097  * @hdev: pointer to the habanalabs device structure
6098  *
6099  * This function executes hard reset based on if driver/FW should do the reset
6100  */
6101 static void gaudi2_execute_hard_reset(struct hl_device *hdev)
6102 {
6103 	if (hdev->asic_prop.hard_reset_done_by_fw) {
6104 		gaudi2_send_hard_reset_cmd(hdev);
6105 		return;
6106 	}
6107 
6108 	/* Set device to handle FLR by H/W as we will put the device
6109 	 * CPU to halt mode
6110 	 */
6111 	WREG32(mmPCIE_AUX_FLR_CTRL,
6112 			(PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
6113 
6114 	gaudi2_send_hard_reset_cmd(hdev);
6115 
6116 	WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
6117 }
6118 
6119 static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_timeout_us)
6120 {
6121 	int i, rc = 0;
6122 	u32 reg_val;
6123 
6124 	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6125 		rc = hl_poll_timeout(
6126 			hdev,
6127 			mmCPU_RST_STATUS_TO_HOST,
6128 			reg_val,
6129 			reg_val == CPU_RST_STATUS_SOFT_RST_DONE,
6130 			1000,
6131 			poll_timeout_us);
6132 
6133 	if (rc)
6134 		dev_err(hdev->dev, "Timeout while waiting for FW to complete soft reset (0x%x)\n",
6135 				reg_val);
6136 	return rc;
6137 }
6138 
6139 /**
6140  * gaudi2_execute_soft_reset - execute soft reset by driver/FW
6141  *
6142  * @hdev: pointer to the habanalabs device structure
6143  * @driver_performs_reset: true if driver should perform reset instead of f/w.
6144  * @poll_timeout_us: time to wait for response from f/w.
6145  *
6146  * This function executes soft reset based on if driver/FW should do the reset
6147  */
6148 static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset,
6149 						u32 poll_timeout_us)
6150 {
6151 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6152 	int rc = 0;
6153 
6154 	if (!driver_performs_reset) {
6155 		if (hl_is_fw_sw_ver_below(hdev, 1, 10)) {
6156 			/* set SP to indicate reset request sent to FW */
6157 			if (dyn_regs->cpu_rst_status)
6158 				WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA);
6159 			else
6160 				WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA);
6161 			WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq),
6162 				gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id);
6163 
6164 			/* wait for f/w response */
6165 			rc = gaudi2_get_soft_rst_done_indication(hdev, poll_timeout_us);
6166 		} else {
6167 			rc = hl_fw_send_soft_reset(hdev);
6168 		}
6169 		return rc;
6170 	}
6171 
6172 	/* Block access to engines, QMANs and SM during reset, these
6173 	 * RRs will be reconfigured after soft reset.
6174 	 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset.
6175 	 */
6176 	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1,
6177 					mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE);
6178 
6179 	gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2,
6180 				mmPCIE_MSIX_BASE + HL_BLOCK_SIZE,
6181 				mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
6182 
6183 	WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
6184 	return 0;
6185 }
6186 
6187 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 poll_timeout_us)
6188 {
6189 	int i, rc = 0;
6190 	u32 reg_val;
6191 
6192 	/* We poll the BTM done indication multiple times after reset due to
6193 	 * a HW errata 'GAUDI2_0300'
6194 	 */
6195 	for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6196 		rc = hl_poll_timeout(
6197 			hdev,
6198 			mmPSOC_GLOBAL_CONF_BTM_FSM,
6199 			reg_val,
6200 			reg_val == 0,
6201 			1000,
6202 			poll_timeout_us);
6203 
6204 	if (rc)
6205 		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
6206 }
6207 
6208 static int gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
6209 {
6210 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6211 	u32 poll_timeout_us, reset_sleep_ms;
6212 	bool driver_performs_reset = false;
6213 	int rc;
6214 
6215 	if (hdev->pldm) {
6216 		reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
6217 						GAUDI2_PLDM_SRESET_TIMEOUT_MSEC;
6218 		poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC;
6219 	} else {
6220 		reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC;
6221 		poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC;
6222 	}
6223 
6224 	if (fw_reset)
6225 		goto skip_reset;
6226 
6227 	gaudi2_reset_arcs(hdev);
6228 
6229 	if (hard_reset) {
6230 		driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
6231 		gaudi2_execute_hard_reset(hdev);
6232 	} else {
6233 		/*
6234 		 * As we have to support also work with preboot only (which does not supports
6235 		 * soft reset) we have to make sure that security is disabled before letting driver
6236 		 * do the reset. user shall control the BFE flags to avoid asking soft reset in
6237 		 * secured device with preboot only.
6238 		 */
6239 		driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
6240 							!hdev->asic_prop.fw_security_enabled);
6241 		rc = gaudi2_execute_soft_reset(hdev, driver_performs_reset, poll_timeout_us);
6242 		if (rc)
6243 			return rc;
6244 	}
6245 
6246 skip_reset:
6247 	if (driver_performs_reset || hard_reset) {
6248 		/*
6249 		 * Instead of waiting for BTM indication we should wait for preboot ready:
6250 		 * Consider the below scenario:
6251 		 * 1. FW update is being triggered
6252 		 *        - setting the dirty bit
6253 		 * 2. hard reset will be triggered due to the dirty bit
6254 		 * 3. FW initiates the reset:
6255 		 *        - dirty bit cleared
6256 		 *        - BTM indication cleared
6257 		 *        - preboot ready indication cleared
6258 		 * 4. during hard reset:
6259 		 *        - BTM indication will be set
6260 		 *        - BIST test performed and another reset triggered
6261 		 * 5. only after this reset the preboot will set the preboot ready
6262 		 *
6263 		 * when polling on BTM indication alone we can lose sync with FW while trying to
6264 		 * communicate with FW that is during reset.
6265 		 * to overcome this we will always wait to preboot ready indication
6266 		 */
6267 
6268 		/* without this sleep reset will not work */
6269 		msleep(reset_sleep_ms);
6270 
6271 		if (hdev->fw_components & FW_TYPE_PREBOOT_CPU)
6272 			hl_fw_wait_preboot_ready(hdev);
6273 		else
6274 			gaudi2_poll_btm_indication(hdev, poll_timeout_us);
6275 	}
6276 
6277 	if (!gaudi2)
6278 		return 0;
6279 
6280 	gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
6281 	gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
6282 
6283 	/*
6284 	 * Clear NIC capability mask in order for driver to re-configure
6285 	 * NIC QMANs. NIC ports will not be re-configured during soft
6286 	 * reset as we call gaudi2_nic_init only during hard reset
6287 	 */
6288 	gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK);
6289 
6290 	if (hard_reset) {
6291 		gaudi2->hw_cap_initialized &=
6292 			~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK |
6293 			HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q |
6294 			HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK |
6295 			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA |
6296 			HW_CAP_MME_MASK | HW_CAP_ROT_MASK);
6297 
6298 		memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat));
6299 	} else {
6300 		gaudi2->hw_cap_initialized &=
6301 			~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET |
6302 			HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
6303 			HW_CAP_ROT_MASK);
6304 	}
6305 	return 0;
6306 }
6307 
6308 static int gaudi2_suspend(struct hl_device *hdev)
6309 {
6310 	int rc;
6311 
6312 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
6313 	if (rc)
6314 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
6315 
6316 	return rc;
6317 }
6318 
6319 static int gaudi2_resume(struct hl_device *hdev)
6320 {
6321 	return gaudi2_init_iatu(hdev);
6322 }
6323 
6324 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
6325 		void *cpu_addr, dma_addr_t dma_addr, size_t size)
6326 {
6327 	int rc;
6328 
6329 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
6330 			VM_DONTCOPY | VM_NORESERVE);
6331 
6332 #ifdef _HAS_DMA_MMAP_COHERENT
6333 
6334 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
6335 	if (rc)
6336 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
6337 
6338 #else
6339 
6340 	rc = remap_pfn_range(vma, vma->vm_start,
6341 				virt_to_phys(cpu_addr) >> PAGE_SHIFT,
6342 				size, vma->vm_page_prot);
6343 	if (rc)
6344 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
6345 
6346 #endif
6347 
6348 	return rc;
6349 }
6350 
6351 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
6352 {
6353 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6354 	u64 hw_cap_mask = 0;
6355 	u64 hw_tpc_cap_bit = 0;
6356 	u64 hw_nic_cap_bit = 0;
6357 	u64 hw_test_cap_bit = 0;
6358 
6359 	switch (hw_queue_id) {
6360 	case GAUDI2_QUEUE_ID_PDMA_0_0:
6361 	case GAUDI2_QUEUE_ID_PDMA_0_1:
6362 	case GAUDI2_QUEUE_ID_PDMA_1_0:
6363 		hw_cap_mask = HW_CAP_PDMA_MASK;
6364 		break;
6365 	case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
6366 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
6367 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
6368 		break;
6369 	case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
6370 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE +
6371 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2);
6372 		break;
6373 	case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
6374 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE +
6375 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2);
6376 		break;
6377 	case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
6378 		hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE +
6379 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2);
6380 		break;
6381 
6382 	case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
6383 		hw_test_cap_bit = HW_CAP_MME_SHIFT;
6384 		break;
6385 
6386 	case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
6387 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 1;
6388 		break;
6389 
6390 	case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
6391 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 2;
6392 		break;
6393 
6394 	case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
6395 		hw_test_cap_bit = HW_CAP_MME_SHIFT + 3;
6396 		break;
6397 
6398 	case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3:
6399 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT +
6400 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2);
6401 
6402 		/* special case where cap bit refers to the first queue id */
6403 		if (!hw_tpc_cap_bit)
6404 			return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0));
6405 		break;
6406 
6407 	case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
6408 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE +
6409 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2);
6410 		break;
6411 
6412 	case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
6413 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) +
6414 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2);
6415 		break;
6416 
6417 	case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
6418 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) +
6419 			((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2);
6420 		break;
6421 
6422 	case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
6423 		hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE);
6424 		break;
6425 
6426 	case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3:
6427 		hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2);
6428 		break;
6429 
6430 	case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3:
6431 		hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2);
6432 
6433 		/* special case where cap bit refers to the first queue id */
6434 		if (!hw_nic_cap_bit)
6435 			return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0));
6436 		break;
6437 
6438 	case GAUDI2_QUEUE_ID_CPU_PQ:
6439 		return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q);
6440 
6441 	default:
6442 		return false;
6443 	}
6444 
6445 	if (hw_tpc_cap_bit)
6446 		return  !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit));
6447 
6448 	if (hw_nic_cap_bit)
6449 		return  !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit));
6450 
6451 	if (hw_test_cap_bit)
6452 		hw_cap_mask = BIT_ULL(hw_test_cap_bit);
6453 
6454 	return !!(gaudi2->hw_cap_initialized & hw_cap_mask);
6455 }
6456 
6457 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id)
6458 {
6459 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6460 
6461 	switch (arc_id) {
6462 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6463 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6464 		return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id));
6465 
6466 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6467 		return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6468 
6469 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6470 		return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6471 
6472 	default:
6473 		return false;
6474 	}
6475 }
6476 
6477 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6478 {
6479 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6480 
6481 	switch (arc_id) {
6482 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6483 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6484 		gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id));
6485 		break;
6486 
6487 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6488 		gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6489 		break;
6490 
6491 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6492 		gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6493 		break;
6494 
6495 	default:
6496 		return;
6497 	}
6498 }
6499 
6500 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6501 {
6502 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6503 
6504 	switch (arc_id) {
6505 	case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6506 	case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6507 		gaudi2->active_hw_arc |= BIT_ULL(arc_id);
6508 		break;
6509 
6510 	case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6511 		gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0);
6512 		break;
6513 
6514 	case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6515 		gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0);
6516 		break;
6517 
6518 	default:
6519 		return;
6520 	}
6521 }
6522 
6523 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
6524 {
6525 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6526 	u32 pq_offset, reg_base, db_reg_offset, db_value;
6527 
6528 	if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) {
6529 		/*
6530 		 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs.
6531 		 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ
6532 		 * number.
6533 		 */
6534 		pq_offset = (hw_queue_id & 0x3) * 4;
6535 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6536 		db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset;
6537 	} else {
6538 		db_reg_offset = mmCPU_IF_PF_PQ_PI;
6539 	}
6540 
6541 	db_value = pi;
6542 
6543 	/* ring the doorbell */
6544 	WREG32(db_reg_offset, db_value);
6545 
6546 	if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) {
6547 		/* make sure device CPU will read latest data from host */
6548 		mb();
6549 		WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
6550 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
6551 	}
6552 }
6553 
6554 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
6555 {
6556 	__le64 *pbd = (__le64 *) bd;
6557 
6558 	/* The QMANs are on the host memory so a simple copy suffice */
6559 	pqe[0] = pbd[0];
6560 	pqe[1] = pbd[1];
6561 }
6562 
6563 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size,
6564 				dma_addr_t *dma_handle, gfp_t flags)
6565 {
6566 	return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
6567 }
6568 
6569 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size,
6570 				void *cpu_addr, dma_addr_t dma_handle)
6571 {
6572 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
6573 }
6574 
6575 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
6576 				u32 timeout, u64 *result)
6577 {
6578 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6579 
6580 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) {
6581 		if (result)
6582 			*result = 0;
6583 		return 0;
6584 	}
6585 
6586 	if (!timeout)
6587 		timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC;
6588 
6589 	return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result);
6590 }
6591 
6592 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size,
6593 				gfp_t mem_flags, dma_addr_t *dma_handle)
6594 {
6595 	if (size > GAUDI2_DMA_POOL_BLK_SIZE)
6596 		return NULL;
6597 
6598 	return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
6599 }
6600 
6601 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
6602 {
6603 	dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
6604 }
6605 
6606 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
6607 						dma_addr_t *dma_handle)
6608 {
6609 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
6610 }
6611 
6612 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
6613 {
6614 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
6615 }
6616 
6617 static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len,
6618 					enum dma_data_direction dir)
6619 {
6620 	dma_addr_t dma_addr;
6621 
6622 	dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir);
6623 	if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr)))
6624 		return 0;
6625 
6626 	return dma_addr;
6627 }
6628 
6629 static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len,
6630 					enum dma_data_direction dir)
6631 {
6632 	dma_unmap_single(&hdev->pdev->dev, addr, len, dir);
6633 }
6634 
6635 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
6636 {
6637 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
6638 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6639 
6640 	if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
6641 		dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
6642 		return -EINVAL;
6643 	}
6644 
6645 	/* Just check if CB address is valid */
6646 
6647 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6648 					parser->user_cb_size,
6649 					asic_prop->sram_user_base_address,
6650 					asic_prop->sram_end_address))
6651 		return 0;
6652 
6653 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6654 					parser->user_cb_size,
6655 					asic_prop->dram_user_base_address,
6656 					asic_prop->dram_end_address))
6657 		return 0;
6658 
6659 	if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) &&
6660 		hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6661 						parser->user_cb_size,
6662 						asic_prop->dmmu.start_addr,
6663 						asic_prop->dmmu.end_addr))
6664 		return 0;
6665 
6666 	if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) {
6667 		if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6668 					parser->user_cb_size,
6669 					asic_prop->pmmu.start_addr,
6670 					asic_prop->pmmu.end_addr) ||
6671 			hl_mem_area_inside_range(
6672 					(u64) (uintptr_t) parser->user_cb,
6673 					parser->user_cb_size,
6674 					asic_prop->pmmu_huge.start_addr,
6675 					asic_prop->pmmu_huge.end_addr))
6676 			return 0;
6677 
6678 	} else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) {
6679 		if (!hdev->pdev)
6680 			return 0;
6681 
6682 		if (!device_iommu_mapped(&hdev->pdev->dev))
6683 			return 0;
6684 	}
6685 
6686 	dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n",
6687 		parser->user_cb, parser->user_cb_size);
6688 
6689 	return -EFAULT;
6690 }
6691 
6692 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
6693 {
6694 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6695 
6696 	if (!parser->is_kernel_allocated_cb)
6697 		return gaudi2_validate_cb_address(hdev, parser);
6698 
6699 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
6700 		dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n");
6701 		return -EINVAL;
6702 	}
6703 
6704 	return 0;
6705 }
6706 
6707 static int gaudi2_send_heartbeat(struct hl_device *hdev)
6708 {
6709 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6710 
6711 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6712 		return 0;
6713 
6714 	return hl_fw_send_heartbeat(hdev);
6715 }
6716 
6717 /* This is an internal helper function, used to update the KDMA mmu props.
6718  * Should be called with a proper kdma lock.
6719  */
6720 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
6721 					   bool mmu_bypass, u32 asid)
6722 {
6723 	u32 rw_asid, rw_mmu_bp;
6724 
6725 	rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6726 		      (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6727 
6728 	rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) |
6729 			(!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT);
6730 
6731 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid);
6732 	WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
6733 }
6734 
6735 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
6736 						u32 mon_payload, u32 sync_value)
6737 {
6738 	u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
6739 	u8 mask;
6740 
6741 	sob_offset = sob_id * 4;
6742 	mon_offset = mon_id * 4;
6743 
6744 	/* Reset the SOB value */
6745 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
6746 
6747 	/* Configure this address with CQ_ID 0 because CQ_EN is set */
6748 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
6749 
6750 	/* Configure this address with CS index because CQ_EN is set */
6751 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
6752 
6753 	sync_group_id = sob_id / 8;
6754 	mask = ~(1 << (sob_id & 0x7));
6755 	mode = 1; /* comparison mode is "equal to" */
6756 
6757 	mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
6758 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
6759 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
6760 	mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
6761 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
6762 }
6763 
6764 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
6765 static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
6766 					u64 src_addr, u64 dst_addr,
6767 					u32 size, bool is_memset)
6768 {
6769 	u32 comp_val, commit_mask, *polling_addr, timeout, status = 0;
6770 	struct hl_cq_entry *cq_base;
6771 	struct hl_cq *cq;
6772 	u64 comp_addr;
6773 	int rc;
6774 
6775 	gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
6776 				GAUDI2_RESERVED_MON_KDMA_COMPLETION,
6777 				GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
6778 
6779 	comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6780 			(GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
6781 
6782 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
6783 			FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
6784 
6785 	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr));
6786 	WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr));
6787 	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr));
6788 	WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr));
6789 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr));
6790 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr));
6791 	WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val);
6792 	WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size);
6793 
6794 	commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
6795 				FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
6796 
6797 	if (is_memset)
6798 		commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1);
6799 
6800 	WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask);
6801 
6802 	/* Wait for completion */
6803 	cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION];
6804 	cq_base = cq->kernel_address;
6805 	polling_addr = (u32 *)&cq_base[cq->ci];
6806 
6807 	if (hdev->pldm)
6808 		/* for each 1MB 20 second of timeout */
6809 		timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20;
6810 	else
6811 		timeout = KDMA_TIMEOUT_USEC;
6812 
6813 	/* Polling */
6814 	rc = hl_poll_timeout_memory(
6815 			hdev,
6816 			polling_addr,
6817 			status,
6818 			(status == 1),
6819 			1000,
6820 			timeout,
6821 			true);
6822 
6823 	*polling_addr = 0;
6824 
6825 	if (rc) {
6826 		dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n");
6827 		WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT);
6828 		return rc;
6829 	}
6830 
6831 	cq->ci = hl_cq_inc_ptr(cq->ci);
6832 
6833 	return 0;
6834 }
6835 
6836 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val)
6837 {
6838 	u32 i;
6839 
6840 	for (i = 0 ; i < size ; i += sizeof(u32))
6841 		WREG32(addr + i, val);
6842 }
6843 
6844 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable)
6845 {
6846 	u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6847 
6848 	if (enable) {
6849 		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE);
6850 		WREG32(reg_base + QM_PQC_CFG_OFFSET, 0);
6851 	} else {
6852 		WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED);
6853 		WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
6854 	}
6855 }
6856 
6857 static inline u32 gaudi2_test_queue_hw_queue_id_to_sob_id(struct hl_device *hdev, u32 hw_queue_id)
6858 {
6859 	return hdev->asic_prop.first_available_user_sob[0] +
6860 				hw_queue_id - GAUDI2_QUEUE_ID_PDMA_0_0;
6861 }
6862 
6863 static void gaudi2_test_queue_clear(struct hl_device *hdev, u32 hw_queue_id)
6864 {
6865 	u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6866 	u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6867 
6868 	/* Reset the SOB value */
6869 	WREG32(sob_addr, 0);
6870 }
6871 
6872 static int gaudi2_test_queue_send_msg_short(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val,
6873 					    struct gaudi2_queues_test_info *msg_info)
6874 {
6875 	u32 sob_offset =  gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6876 	u32 tmp, sob_base = 1;
6877 	struct packet_msg_short *msg_short_pkt = msg_info->kern_addr;
6878 	size_t pkt_size = sizeof(struct packet_msg_short);
6879 	int rc;
6880 
6881 	tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
6882 		(1 << GAUDI2_PKT_CTL_EB_SHIFT) |
6883 		(1 << GAUDI2_PKT_CTL_MB_SHIFT) |
6884 		(sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) |
6885 		(sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT);
6886 
6887 	msg_short_pkt->value = cpu_to_le32(sob_val);
6888 	msg_short_pkt->ctl = cpu_to_le32(tmp);
6889 
6890 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, msg_info->dma_addr);
6891 	if (rc)
6892 		dev_err(hdev->dev,
6893 			"Failed to send msg_short packet to H/W queue %d\n", hw_queue_id);
6894 
6895 	return rc;
6896 }
6897 
6898 static int gaudi2_test_queue_wait_completion(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val)
6899 {
6900 	u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
6901 	u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
6902 	u32 timeout_usec, tmp;
6903 	int rc;
6904 
6905 	if (hdev->pldm)
6906 		timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
6907 	else
6908 		timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
6909 
6910 	rc = hl_poll_timeout(
6911 			hdev,
6912 			sob_addr,
6913 			tmp,
6914 			(tmp == sob_val),
6915 			1000,
6916 			timeout_usec);
6917 
6918 	if (rc == -ETIMEDOUT) {
6919 		dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
6920 			hw_queue_id, tmp);
6921 		rc = -EIO;
6922 	}
6923 
6924 	return rc;
6925 }
6926 
6927 static int gaudi2_test_cpu_queue(struct hl_device *hdev)
6928 {
6929 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6930 
6931 	/*
6932 	 * check capability here as send_cpu_message() won't update the result
6933 	 * value if no capability
6934 	 */
6935 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6936 		return 0;
6937 
6938 	return hl_fw_test_cpu_queue(hdev);
6939 }
6940 
6941 static int gaudi2_test_queues(struct hl_device *hdev)
6942 {
6943 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6944 	struct gaudi2_queues_test_info *msg_info;
6945 	u32 sob_val = 0x5a5a;
6946 	int i, rc;
6947 
6948 	/* send test message on all enabled Qs */
6949 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6950 		if (!gaudi2_is_queue_enabled(hdev, i))
6951 			continue;
6952 
6953 		msg_info = &gaudi2->queues_test_info[i - GAUDI2_QUEUE_ID_PDMA_0_0];
6954 		gaudi2_qman_set_test_mode(hdev, i, true);
6955 		gaudi2_test_queue_clear(hdev, i);
6956 		rc = gaudi2_test_queue_send_msg_short(hdev, i, sob_val, msg_info);
6957 		if (rc)
6958 			goto done;
6959 	}
6960 
6961 	rc = gaudi2_test_cpu_queue(hdev);
6962 	if (rc)
6963 		goto done;
6964 
6965 	/* verify that all messages were processed */
6966 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
6967 		if (!gaudi2_is_queue_enabled(hdev, i))
6968 			continue;
6969 
6970 		rc = gaudi2_test_queue_wait_completion(hdev, i, sob_val);
6971 		if (rc)
6972 			/* chip is not usable, no need for cleanups, just bail-out with error */
6973 			goto done;
6974 
6975 		gaudi2_test_queue_clear(hdev, i);
6976 		gaudi2_qman_set_test_mode(hdev, i, false);
6977 	}
6978 
6979 done:
6980 	return rc;
6981 }
6982 
6983 static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
6984 {
6985 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
6986 	size_t irq_arr_size;
6987 	int rc;
6988 
6989 	gaudi2_init_arcs(hdev);
6990 
6991 	rc = gaudi2_scrub_arcs_dccm(hdev);
6992 	if (rc) {
6993 		dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
6994 		return rc;
6995 	}
6996 
6997 	gaudi2_init_security(hdev);
6998 
6999 	/* Unmask all IRQs since some could have been received during the soft reset */
7000 	irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]);
7001 	return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
7002 }
7003 
7004 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7005 		struct engines_data *e)
7006 {
7007 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7008 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7009 	unsigned long *mask = (unsigned long *) mask_arr;
7010 	const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#-15x%#x\n";
7011 	bool is_idle = true, is_eng_idle;
7012 	int engine_idx, i, j;
7013 	u64 offset;
7014 
7015 	if (e)
7016 		hl_engine_data_sprintf(e,
7017 			"\nCORE  EDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0  DMA_CORE_STS1\n"
7018 			"----  ----  -------  ------------  -------------  -------------\n");
7019 
7020 	for (i = 0; i < NUM_OF_DCORES; i++) {
7021 		for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
7022 			int seq = i * NUM_OF_EDMA_PER_DCORE + j;
7023 
7024 			if (!(prop->edma_enabled_mask & BIT(seq)))
7025 				continue;
7026 
7027 			engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 +
7028 					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7029 			offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
7030 
7031 			dma_core_sts0 = RREG32(mmDCORE0_EDMA0_CORE_STS0 + offset);
7032 			dma_core_sts1 = RREG32(mmDCORE0_EDMA0_CORE_STS1 + offset);
7033 
7034 			qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
7035 			qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
7036 			qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
7037 
7038 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7039 					IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7040 			is_idle &= is_eng_idle;
7041 
7042 			if (mask && !is_eng_idle)
7043 				set_bit(engine_idx, mask);
7044 
7045 			if (e)
7046 				hl_engine_data_sprintf(e, edma_fmt, i, j, is_eng_idle ? "Y" : "N",
7047 							qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7048 		}
7049 	}
7050 
7051 	return is_idle;
7052 }
7053 
7054 static bool gaudi2_get_pdma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7055 		struct engines_data *e)
7056 {
7057 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7058 	unsigned long *mask = (unsigned long *) mask_arr;
7059 	const char *pdma_fmt = "%-6d%-9s%#-14x%#-15x%#x\n";
7060 	bool is_idle = true, is_eng_idle;
7061 	int engine_idx, i;
7062 	u64 offset;
7063 
7064 	if (e)
7065 		hl_engine_data_sprintf(e,
7066 					"\nPDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0  DMA_CORE_STS1\n"
7067 					"----  -------  ------------  -------------  -------------\n");
7068 
7069 	for (i = 0 ; i < NUM_OF_PDMA ; i++) {
7070 		engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
7071 		offset = i * PDMA_OFFSET;
7072 		dma_core_sts0 = RREG32(mmPDMA0_CORE_STS0 + offset);
7073 		dma_core_sts1 = RREG32(mmPDMA0_CORE_STS1 + offset);
7074 
7075 		qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
7076 		qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
7077 		qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
7078 
7079 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7080 				IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7081 		is_idle &= is_eng_idle;
7082 
7083 		if (mask && !is_eng_idle)
7084 			set_bit(engine_idx, mask);
7085 
7086 		if (e)
7087 			hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
7088 						qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7089 	}
7090 
7091 	return is_idle;
7092 }
7093 
7094 static bool gaudi2_get_nic_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7095 		struct engines_data *e)
7096 {
7097 	unsigned long *mask = (unsigned long *) mask_arr;
7098 	const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
7099 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7100 	bool is_idle = true, is_eng_idle;
7101 	int engine_idx, i;
7102 	u64 offset = 0;
7103 
7104 	/* NIC, twelve macros in Full chip */
7105 	if (e && hdev->nic_ports_mask)
7106 		hl_engine_data_sprintf(e,
7107 					"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
7108 					"---  -------  ------------  ----------\n");
7109 
7110 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
7111 		if (!(i & 1))
7112 			offset = i / 2 * NIC_OFFSET;
7113 		else
7114 			offset += NIC_QM_OFFSET;
7115 
7116 		if (!(hdev->nic_ports_mask & BIT(i)))
7117 			continue;
7118 
7119 		engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i;
7120 
7121 
7122 		qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
7123 		qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset);
7124 		qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
7125 
7126 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7127 		is_idle &= is_eng_idle;
7128 
7129 		if (mask && !is_eng_idle)
7130 			set_bit(engine_idx, mask);
7131 
7132 		if (e)
7133 			hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
7134 						qm_glbl_sts0, qm_cgm_sts);
7135 	}
7136 
7137 	return is_idle;
7138 }
7139 
7140 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7141 		struct engines_data *e)
7142 {
7143 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, mme_arch_sts;
7144 	unsigned long *mask = (unsigned long *) mask_arr;
7145 	const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
7146 	bool is_idle = true, is_eng_idle;
7147 	int engine_idx, i;
7148 	u64 offset;
7149 
7150 	if (e)
7151 		hl_engine_data_sprintf(e,
7152 					"\nMME  Stub  is_idle  QM_GLBL_STS0  MME_ARCH_STATUS\n"
7153 					"---  ----  -------  ------------  ---------------\n");
7154 	/* MME, one per Dcore */
7155 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7156 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
7157 		offset = i * DCORE_OFFSET;
7158 
7159 		qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset);
7160 		qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset);
7161 		qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset);
7162 
7163 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7164 		is_idle &= is_eng_idle;
7165 
7166 		mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset);
7167 		is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
7168 		is_idle &= is_eng_idle;
7169 
7170 		if (e)
7171 			hl_engine_data_sprintf(e, mme_fmt, i, "N",
7172 				is_eng_idle ? "Y" : "N",
7173 				qm_glbl_sts0,
7174 				mme_arch_sts);
7175 
7176 		if (mask && !is_eng_idle)
7177 			set_bit(engine_idx, mask);
7178 	}
7179 
7180 	return is_idle;
7181 }
7182 
7183 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
7184 					struct iterate_module_ctx *ctx)
7185 {
7186 	struct gaudi2_tpc_idle_data *idle_data = ctx->data;
7187 	u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7188 	bool is_eng_idle;
7189 	int engine_idx;
7190 
7191 	if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
7192 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7193 	else
7194 		engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
7195 				dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
7196 
7197 	tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
7198 	qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
7199 	qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
7200 	qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
7201 
7202 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7203 						IS_TPC_IDLE(tpc_cfg_sts);
7204 	*(idle_data->is_idle) &= is_eng_idle;
7205 
7206 	if (idle_data->mask && !is_eng_idle)
7207 		set_bit(engine_idx, idle_data->mask);
7208 
7209 	if (idle_data->e)
7210 		hl_engine_data_sprintf(idle_data->e,
7211 					idle_data->tpc_fmt, dcore, inst,
7212 					is_eng_idle ? "Y" : "N",
7213 					qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
7214 }
7215 
7216 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7217 		struct engines_data *e)
7218 {
7219 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7220 	unsigned long *mask = (unsigned long *) mask_arr;
7221 	bool is_idle = true;
7222 
7223 	struct gaudi2_tpc_idle_data tpc_idle_data = {
7224 		.tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
7225 		.e = e,
7226 		.mask = mask,
7227 		.is_idle = &is_idle,
7228 	};
7229 	struct iterate_module_ctx tpc_iter = {
7230 		.fn = &gaudi2_is_tpc_engine_idle,
7231 		.data = &tpc_idle_data,
7232 	};
7233 
7234 	if (e && prop->tpc_enabled_mask)
7235 		hl_engine_data_sprintf(e,
7236 			"\nCORE  TPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  STATUS\n"
7237 			"----  ---  -------  ------------  ----------  ------\n");
7238 
7239 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
7240 
7241 	return *tpc_idle_data.is_idle;
7242 }
7243 
7244 static bool gaudi2_get_decoder_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7245 		struct engines_data *e)
7246 {
7247 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7248 	unsigned long *mask = (unsigned long *) mask_arr;
7249 	const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
7250 	const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
7251 	bool is_idle = true, is_eng_idle;
7252 	u32 dec_swreg15, dec_enabled_bit;
7253 	int engine_idx, i, j;
7254 	u64 offset;
7255 
7256 	/* Decoders, two each Dcore and two shared PCIe decoders */
7257 	if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
7258 		hl_engine_data_sprintf(e,
7259 			"\nCORE  DEC  is_idle  VSI_CMD_SWREG15\n"
7260 			"----  ---  -------  ---------------\n");
7261 
7262 	for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7263 		for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) {
7264 			dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j);
7265 			if (!(prop->decoder_enabled_mask & dec_enabled_bit))
7266 				continue;
7267 
7268 			engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 +
7269 					i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7270 			offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET;
7271 
7272 			dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset);
7273 			is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7274 			is_idle &= is_eng_idle;
7275 
7276 			if (mask && !is_eng_idle)
7277 				set_bit(engine_idx, mask);
7278 
7279 			if (e)
7280 				hl_engine_data_sprintf(e, dec_fmt, i, j,
7281 							is_eng_idle ? "Y" : "N", dec_swreg15);
7282 		}
7283 	}
7284 
7285 	if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
7286 		hl_engine_data_sprintf(e,
7287 			"\nPCIe DEC  is_idle  VSI_CMD_SWREG15\n"
7288 			"--------  -------  ---------------\n");
7289 
7290 	/* Check shared(PCIe) decoders */
7291 	for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) {
7292 		dec_enabled_bit = PCIE_DEC_SHIFT + i;
7293 		if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit)))
7294 			continue;
7295 
7296 		engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i;
7297 		offset = i * DCORE_DEC_OFFSET;
7298 		dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset);
7299 		is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7300 		is_idle &= is_eng_idle;
7301 
7302 		if (mask && !is_eng_idle)
7303 			set_bit(engine_idx, mask);
7304 
7305 		if (e)
7306 			hl_engine_data_sprintf(e, pcie_dec_fmt, i,
7307 						is_eng_idle ? "Y" : "N", dec_swreg15);
7308 	}
7309 
7310 	return is_idle;
7311 }
7312 
7313 static bool gaudi2_get_rotator_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7314 		struct engines_data *e)
7315 {
7316 	const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-14x%#x\n";
7317 	unsigned long *mask = (unsigned long *) mask_arr;
7318 	u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7319 	bool is_idle = true, is_eng_idle;
7320 	int engine_idx, i;
7321 	u64 offset;
7322 
7323 	if (e)
7324 		hl_engine_data_sprintf(e,
7325 			"\nCORE  ROT  is_idle  QM_GLBL_STS0  QM_GLBL_STS1  QM_CGM_STS\n"
7326 			"----  ---  -------  ------------  ------------  ----------\n");
7327 
7328 	for (i = 0 ; i < NUM_OF_ROT ; i++) {
7329 		engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
7330 
7331 		offset = i * ROT_OFFSET;
7332 
7333 		qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset);
7334 		qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset);
7335 		qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset);
7336 
7337 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7338 		is_idle &= is_eng_idle;
7339 
7340 		if (mask && !is_eng_idle)
7341 			set_bit(engine_idx, mask);
7342 
7343 		if (e)
7344 			hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
7345 						qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7346 	}
7347 
7348 	return is_idle;
7349 }
7350 
7351 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7352 					struct engines_data *e)
7353 {
7354 	bool is_idle = true;
7355 
7356 	is_idle &= gaudi2_get_edma_idle_status(hdev, mask_arr, mask_len, e);
7357 	is_idle &= gaudi2_get_pdma_idle_status(hdev, mask_arr, mask_len, e);
7358 	is_idle &= gaudi2_get_nic_idle_status(hdev, mask_arr, mask_len, e);
7359 	is_idle &= gaudi2_get_mme_idle_status(hdev, mask_arr, mask_len, e);
7360 	is_idle &= gaudi2_get_tpc_idle_status(hdev, mask_arr, mask_len, e);
7361 	is_idle &= gaudi2_get_decoder_idle_status(hdev, mask_arr, mask_len, e);
7362 	is_idle &= gaudi2_get_rotator_idle_status(hdev, mask_arr, mask_len, e);
7363 
7364 	return is_idle;
7365 }
7366 
7367 static void gaudi2_hw_queues_lock(struct hl_device *hdev)
7368 	__acquires(&gaudi2->hw_queues_lock)
7369 {
7370 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7371 
7372 	spin_lock(&gaudi2->hw_queues_lock);
7373 }
7374 
7375 static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
7376 	__releases(&gaudi2->hw_queues_lock)
7377 {
7378 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7379 
7380 	spin_unlock(&gaudi2->hw_queues_lock);
7381 }
7382 
7383 static u32 gaudi2_get_pci_id(struct hl_device *hdev)
7384 {
7385 	return hdev->pdev->device;
7386 }
7387 
7388 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
7389 {
7390 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7391 
7392 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
7393 		return 0;
7394 
7395 	return hl_fw_get_eeprom_data(hdev, data, max_size);
7396 }
7397 
7398 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val)
7399 {
7400 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
7401 }
7402 
7403 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7404 {
7405 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7406 
7407 	if (aggregate) {
7408 		*size = (u32) sizeof(gaudi2->events_stat_aggregate);
7409 		return gaudi2->events_stat_aggregate;
7410 	}
7411 
7412 	*size = (u32) sizeof(gaudi2->events_stat);
7413 	return gaudi2->events_stat;
7414 }
7415 
7416 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id,
7417 				int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7418 {
7419 	u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) *
7420 			dcore_vdec_id + DCORE_OFFSET * dcore_id;
7421 
7422 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7423 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7424 
7425 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7426 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7427 
7428 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7429 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7430 
7431 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7432 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7433 
7434 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7435 	WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7436 }
7437 
7438 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid)
7439 {
7440 	u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7441 			(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7442 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7443 	u32 dcore_offset = dcore_id * DCORE_OFFSET;
7444 	u32 vdec_id, i, ports_offset, reg_val;
7445 	u8 edma_seq_base;
7446 
7447 	/* EDMA */
7448 	edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE;
7449 	if (prop->edma_enabled_mask & BIT(edma_seq_base)) {
7450 		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7451 		WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7452 		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7453 		WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7454 	}
7455 
7456 	if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) {
7457 		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7458 		WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7459 		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7460 		WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7461 	}
7462 
7463 	/* Sync Mngr */
7464 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid);
7465 	/*
7466 	 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID
7467 	 * for any access type
7468 	 */
7469 	if (dcore_id > 0) {
7470 		reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) |
7471 			  (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT);
7472 		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val);
7473 		WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0);
7474 	}
7475 
7476 	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0);
7477 	WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid);
7478 
7479 	for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) {
7480 		ports_offset = i * DCORE_MME_SBTE_OFFSET;
7481 		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP +
7482 				dcore_offset + ports_offset, 0);
7483 		WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID +
7484 				dcore_offset + ports_offset, rw_asid);
7485 	}
7486 
7487 	for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) {
7488 		ports_offset = i * DCORE_MME_WB_OFFSET;
7489 		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP +
7490 				dcore_offset + ports_offset, 0);
7491 		WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID +
7492 				dcore_offset + ports_offset, rw_asid);
7493 	}
7494 
7495 	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7496 	WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7497 
7498 	/*
7499 	 * Decoders
7500 	 */
7501 	for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) {
7502 		if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id))
7503 			gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0);
7504 	}
7505 }
7506 
7507 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev,
7508 				int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7509 {
7510 	u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id;
7511 
7512 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7513 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7514 
7515 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7516 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7517 
7518 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7519 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7520 
7521 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7522 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7523 
7524 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7525 	WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7526 }
7527 
7528 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id,
7529 							u32 rw_asid, u32 rw_mmu_bp)
7530 {
7531 	u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id;
7532 
7533 	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp);
7534 	WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid);
7535 }
7536 
7537 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid)
7538 {
7539 	u32 reg_base, reg_offset, reg_val = 0;
7540 
7541 	reg_base = gaudi2_arc_blocks_bases[cpu_id];
7542 
7543 	/* Enable MMU and configure asid for all relevant ARC regions */
7544 	reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0);
7545 	reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid);
7546 
7547 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL);
7548 	WREG32(reg_base + reg_offset, reg_val);
7549 
7550 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW);
7551 	WREG32(reg_base + reg_offset, reg_val);
7552 
7553 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA);
7554 	WREG32(reg_base + reg_offset, reg_val);
7555 
7556 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA);
7557 	WREG32(reg_base + reg_offset, reg_val);
7558 
7559 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA);
7560 	WREG32(reg_base + reg_offset, reg_val);
7561 
7562 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE);
7563 	WREG32(reg_base + reg_offset, reg_val);
7564 
7565 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL);
7566 	WREG32(reg_base + reg_offset, reg_val);
7567 
7568 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL);
7569 	WREG32(reg_base + reg_offset, reg_val);
7570 
7571 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL);
7572 	WREG32(reg_base + reg_offset, reg_val);
7573 
7574 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL);
7575 	WREG32(reg_base + reg_offset, reg_val);
7576 
7577 	reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL);
7578 	WREG32(reg_base + reg_offset, reg_val);
7579 }
7580 
7581 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid)
7582 {
7583 	int i;
7584 
7585 	if (hdev->fw_components & FW_TYPE_BOOT_CPU)
7586 		return hl_fw_cpucp_engine_core_asid_set(hdev, asid);
7587 
7588 	for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7589 		gaudi2_arc_mmu_prepare(hdev, i, asid);
7590 
7591 	for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
7592 		if (!gaudi2_is_queue_enabled(hdev, i))
7593 			continue;
7594 
7595 		gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid);
7596 	}
7597 
7598 	return 0;
7599 }
7600 
7601 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
7602 {
7603 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7604 	u32 rw_asid, offset;
7605 	int rc, i;
7606 
7607 	rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) |
7608 			FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid);
7609 
7610 	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7611 	WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7612 	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7613 	WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7614 
7615 	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7616 	WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7617 	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7618 	WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7619 
7620 	/* ROT */
7621 	for (i = 0 ; i < NUM_OF_ROT ; i++) {
7622 		offset = i * ROT_OFFSET;
7623 		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid);
7624 		WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7625 		RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK);
7626 		RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK);
7627 		RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK);
7628 	}
7629 
7630 	/* Shared Decoders are the last bits in the decoders mask */
7631 	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0))
7632 		gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0);
7633 
7634 	if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1))
7635 		gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0);
7636 
7637 	/* arc farm arc dup eng */
7638 	for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7639 		gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0);
7640 
7641 	rc = gaudi2_arc_mmu_prepare_all(hdev, asid);
7642 	if (rc)
7643 		return rc;
7644 
7645 	return 0;
7646 }
7647 
7648 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst,	u32 offset,
7649 					struct iterate_module_ctx *ctx)
7650 {
7651 	struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
7652 
7653 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
7654 	WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
7655 	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7656 	WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid);
7657 }
7658 
7659 /* zero the MMUBP and set the ASID */
7660 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid)
7661 {
7662 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
7663 	struct gaudi2_tpc_mmu_data tpc_mmu_data;
7664 	struct iterate_module_ctx tpc_iter = {
7665 		.fn = &gaudi2_tpc_mmu_prepare,
7666 		.data = &tpc_mmu_data,
7667 	};
7668 	int rc, i;
7669 
7670 	if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) {
7671 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
7672 		return -EINVAL;
7673 	}
7674 
7675 	if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
7676 		return 0;
7677 
7678 	rc = gaudi2_mmu_shared_prepare(hdev, asid);
7679 	if (rc)
7680 		return rc;
7681 
7682 	/* configure DCORE MMUs */
7683 	tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7684 				(asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7685 	gaudi2_iterate_tpcs(hdev, &tpc_iter);
7686 	for (i = 0 ; i < NUM_OF_DCORES ; i++)
7687 		gaudi2_mmu_dcore_prepare(hdev, i, asid);
7688 
7689 	return 0;
7690 }
7691 
7692 static inline bool is_info_event(u32 event)
7693 {
7694 	switch (event) {
7695 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
7696 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
7697 
7698 	/* return in case of NIC status event - these events are received periodically and not as
7699 	 * an indication to an error.
7700 	 */
7701 	case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
7702 		return true;
7703 	default:
7704 		return false;
7705 	}
7706 }
7707 
7708 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
7709 			bool ratelimited, const char *fmt, ...)
7710 {
7711 	struct va_format vaf;
7712 	va_list args;
7713 
7714 	va_start(args, fmt);
7715 	vaf.fmt = fmt;
7716 	vaf.va = &args;
7717 
7718 	if (ratelimited)
7719 		dev_err_ratelimited(hdev->dev, "%s: %pV\n",
7720 			gaudi2_irq_map_table[event_type].valid ?
7721 			gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7722 	else
7723 		dev_err(hdev->dev, "%s: %pV\n",
7724 			gaudi2_irq_map_table[event_type].valid ?
7725 			gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7726 
7727 	va_end(args);
7728 }
7729 
7730 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7731 		struct hl_eq_ecc_data *ecc_data)
7732 {
7733 	u64 ecc_address = 0, ecc_syndrom = 0;
7734 	u8 memory_wrapper_idx = 0;
7735 
7736 	ecc_address = le64_to_cpu(ecc_data->ecc_address);
7737 	ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7738 	memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7739 
7740 	gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
7741 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.",
7742 		ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical);
7743 
7744 	return !!ecc_data->is_critical;
7745 }
7746 
7747 /*
7748  * gaudi2_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7749  *
7750  * @idx: the current pi/ci value
7751  * @q_len: the queue length (power of 2)
7752  *
7753  * @return the cyclically decremented index
7754  */
7755 static inline u32 gaudi2_queue_idx_dec(u32 idx, u32 q_len)
7756 {
7757 	u32 mask = q_len - 1;
7758 
7759 	/*
7760 	 * modular decrement is equivalent to adding (queue_size -1)
7761 	 * later we take LSBs to make sure the value is in the
7762 	 * range [0, queue_len - 1]
7763 	 */
7764 	return (idx + q_len - 1) & mask;
7765 }
7766 
7767 /**
7768  * gaudi2_print_sw_config_stream_data - print SW config stream data
7769  *
7770  * @hdev: pointer to the habanalabs device structure
7771  * @stream: the QMAN's stream
7772  * @qman_base: base address of QMAN registers block
7773  */
7774 static void gaudi2_print_sw_config_stream_data(struct hl_device *hdev,
7775 						u32 stream, u64 qman_base)
7776 {
7777 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7778 	u32 cq_ptr_lo_off, size;
7779 
7780 	cq_ptr_lo_off = mmDCORE0_TPC0_QM_CQ_PTR_LO_1 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0;
7781 
7782 	cq_ptr_lo = qman_base + (mmDCORE0_TPC0_QM_CQ_PTR_LO_0 - mmDCORE0_TPC0_QM_BASE) +
7783 									stream * cq_ptr_lo_off;
7784 
7785 	cq_ptr_hi = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_PTR_HI_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
7786 
7787 	cq_tsize = cq_ptr_lo + (mmDCORE0_TPC0_QM_CQ_TSIZE_0 - mmDCORE0_TPC0_QM_CQ_PTR_LO_0);
7788 
7789 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7790 	size = RREG32(cq_tsize);
7791 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
7792 		stream, cq_ptr, size);
7793 }
7794 
7795 /**
7796  * gaudi2_print_last_pqes_on_err - print last PQEs on error
7797  *
7798  * @hdev: pointer to the habanalabs device structure
7799  * @qid_base: first QID of the QMAN (out of 4 streams)
7800  * @stream: the QMAN's stream
7801  * @qman_base: base address of QMAN registers block
7802  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7803  */
7804 static void gaudi2_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, u32 stream,
7805 						u64 qman_base, bool pr_sw_conf)
7806 {
7807 	u32 ci, qm_ci_stream_off;
7808 	struct hl_hw_queue *q;
7809 	u64 pq_ci;
7810 	int i;
7811 
7812 	q = &hdev->kernel_queues[qid_base + stream];
7813 
7814 	qm_ci_stream_off = mmDCORE0_TPC0_QM_PQ_CI_1 - mmDCORE0_TPC0_QM_PQ_CI_0;
7815 	pq_ci = qman_base + (mmDCORE0_TPC0_QM_PQ_CI_0 - mmDCORE0_TPC0_QM_BASE) +
7816 						stream * qm_ci_stream_off;
7817 
7818 	hdev->asic_funcs->hw_queues_lock(hdev);
7819 
7820 	if (pr_sw_conf)
7821 		gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
7822 
7823 	ci = RREG32(pq_ci);
7824 
7825 	/* we should start printing form ci -1 */
7826 	ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
7827 
7828 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7829 		struct hl_bd *bd;
7830 		u64 addr;
7831 		u32 len;
7832 
7833 		bd = q->kernel_address;
7834 		bd += ci;
7835 
7836 		len = le32_to_cpu(bd->len);
7837 		/* len 0 means uninitialized entry- break */
7838 		if (!len)
7839 			break;
7840 
7841 		addr = le64_to_cpu(bd->ptr);
7842 
7843 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7844 			stream, ci, addr, len);
7845 
7846 		/* get previous ci, wrap if needed */
7847 		ci = gaudi2_queue_idx_dec(ci, HL_QUEUE_LENGTH);
7848 	}
7849 
7850 	hdev->asic_funcs->hw_queues_unlock(hdev);
7851 }
7852 
7853 /**
7854  * print_qman_data_on_err - extract QMAN data on error
7855  *
7856  * @hdev: pointer to the habanalabs device structure
7857  * @qid_base: first QID of the QMAN (out of 4 streams)
7858  * @stream: the QMAN's stream
7859  * @qman_base: base address of QMAN registers block
7860  *
7861  * This function attempt to extract as much data as possible on QMAN error.
7862  * On upper CP print the SW config stream data and last 8 PQEs.
7863  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7864  */
7865 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base, u32 stream, u64 qman_base)
7866 {
7867 	u32 i;
7868 
7869 	if (stream != QMAN_STREAMS) {
7870 		gaudi2_print_last_pqes_on_err(hdev, qid_base, stream, qman_base, true);
7871 		return;
7872 	}
7873 
7874 	gaudi2_print_sw_config_stream_data(hdev, stream, qman_base);
7875 
7876 	for (i = 0 ; i < QMAN_STREAMS ; i++)
7877 		gaudi2_print_last_pqes_on_err(hdev, qid_base, i, qman_base, false);
7878 }
7879 
7880 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
7881 							u64 qman_base, u32 qid_base)
7882 {
7883 	u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
7884 	u64 glbl_sts_addr, arb_err_addr;
7885 	char reg_desc[32];
7886 
7887 	glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE);
7888 	arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE);
7889 
7890 	/* Iterate through all stream GLBL_ERR_STS registers + Lower CP */
7891 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7892 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7893 
7894 		if (!glbl_sts_val)
7895 			continue;
7896 
7897 		if (i == QMAN_STREAMS) {
7898 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7899 			num_error_causes = GAUDI2_NUM_OF_QM_LCP_ERR_CAUSE;
7900 		} else {
7901 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7902 			num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
7903 		}
7904 
7905 		for (j = 0 ; j < num_error_causes ; j++)
7906 			if (glbl_sts_val & BIT(j)) {
7907 				gaudi2_print_event(hdev, event_type, true,
7908 					"%s. err cause: %s", reg_desc,
7909 					i == QMAN_STREAMS ?
7910 					gaudi2_qman_lower_cp_error_cause[j] :
7911 					gaudi2_qman_error_cause[j]);
7912 				error_count++;
7913 			}
7914 
7915 		print_qman_data_on_err(hdev, qid_base, i, qman_base);
7916 	}
7917 
7918 	arb_err_val = RREG32(arb_err_addr);
7919 
7920 	if (!arb_err_val)
7921 		goto out;
7922 
7923 	for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7924 		if (arb_err_val & BIT(j)) {
7925 			gaudi2_print_event(hdev, event_type, true,
7926 				"ARB_ERR. err cause: %s",
7927 				gaudi2_qman_arb_error_cause[j]);
7928 			error_count++;
7929 		}
7930 	}
7931 
7932 out:
7933 	return error_count;
7934 }
7935 
7936 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
7937 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7938 			enum gaudi2_engine_id id, u64 *event_mask)
7939 {
7940 	u32 razwi_hi, razwi_lo, razwi_xy;
7941 	u16 eng_id = id;
7942 	u8 rd_wr_flag;
7943 
7944 	if (is_write) {
7945 		razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI);
7946 		razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO);
7947 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY);
7948 		rd_wr_flag = HL_RAZWI_WRITE;
7949 	} else {
7950 		razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
7951 		razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO);
7952 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY);
7953 		rd_wr_flag = HL_RAZWI_READ;
7954 	}
7955 
7956 	hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
7957 				rd_wr_flag | HL_RAZWI_HBW, event_mask);
7958 
7959 	dev_err_ratelimited(hdev->dev,
7960 		"%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
7961 		name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
7962 }
7963 
7964 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
7965 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
7966 			enum gaudi2_engine_id id, u64 *event_mask)
7967 {
7968 	u64 razwi_addr = CFG_BASE;
7969 	u32 razwi_xy;
7970 	u16 eng_id = id;
7971 	u8 rd_wr_flag;
7972 
7973 	if (is_write) {
7974 		razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
7975 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
7976 		rd_wr_flag = HL_RAZWI_WRITE;
7977 	} else {
7978 		razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
7979 		razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
7980 		rd_wr_flag = HL_RAZWI_READ;
7981 	}
7982 
7983 	hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask);
7984 	dev_err_ratelimited(hdev->dev,
7985 				"%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%llX Initiator coordinates 0x%x\n",
7986 				name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
7987 						razwi_xy);
7988 }
7989 
7990 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
7991 						enum razwi_event_sources module, u8 module_idx)
7992 {
7993 	switch (module) {
7994 	case RAZWI_TPC:
7995 		if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES))
7996 			return GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7997 		return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
7998 				(module_idx % NUM_OF_TPC_PER_DCORE) +
7999 				(GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
8000 
8001 	case RAZWI_MME:
8002 		return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) +
8003 			(module_idx * ENGINE_ID_DCORE_OFFSET));
8004 
8005 	case RAZWI_EDMA:
8006 		return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8007 			(module_idx % NUM_OF_EDMA_PER_DCORE));
8008 
8009 	case RAZWI_PDMA:
8010 		return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx);
8011 
8012 	case RAZWI_NIC:
8013 		return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx));
8014 
8015 	case RAZWI_DEC:
8016 		if (module_idx == 8)
8017 			return GAUDI2_PCIE_ENGINE_ID_DEC_0;
8018 
8019 		if (module_idx == 9)
8020 			return GAUDI2_PCIE_ENGINE_ID_DEC_1;
8021 					;
8022 		return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8023 				(module_idx % NUM_OF_DEC_PER_DCORE) +
8024 				(GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
8025 
8026 	case RAZWI_ROT:
8027 		return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
8028 
8029 	default:
8030 		return GAUDI2_ENGINE_ID_SIZE;
8031 	}
8032 }
8033 
8034 /*
8035  * This function handles RR(Range register) hit events.
8036  * raised be initiators not PSOC RAZWI.
8037  */
8038 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
8039 				enum razwi_event_sources module, u8 module_idx,
8040 				u8 module_sub_idx, u64 *event_mask)
8041 {
8042 	bool via_sft = false;
8043 	u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, eng_id;
8044 	u64 hbw_rtr_mstr_if_base_addr, lbw_rtr_mstr_if_base_addr;
8045 	u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
8046 	u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
8047 	char initiator_name[64];
8048 
8049 	switch (module) {
8050 	case RAZWI_TPC:
8051 		hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx];
8052 
8053 		if (hl_is_fw_sw_ver_below(hdev, 1, 9) &&
8054 				!hdev->asic_prop.fw_security_enabled &&
8055 				((module_idx == 0) || (module_idx == 1)))
8056 			lbw_rtr_id = DCORE0_RTR0;
8057 		else
8058 			lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx];
8059 		sprintf(initiator_name, "TPC_%u", module_idx);
8060 		break;
8061 	case RAZWI_MME:
8062 		sprintf(initiator_name, "MME_%u", module_idx);
8063 		switch (module_sub_idx) {
8064 		case MME_WAP0:
8065 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
8066 			break;
8067 		case MME_WAP1:
8068 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
8069 			break;
8070 		case MME_WRITE:
8071 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
8072 			break;
8073 		case MME_READ:
8074 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
8075 			break;
8076 		case MME_SBTE0:
8077 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
8078 			break;
8079 		case MME_SBTE1:
8080 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
8081 			break;
8082 		case MME_SBTE2:
8083 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
8084 			break;
8085 		case MME_SBTE3:
8086 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
8087 			break;
8088 		case MME_SBTE4:
8089 			hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
8090 			break;
8091 		default:
8092 			return;
8093 		}
8094 		lbw_rtr_id = hbw_rtr_id;
8095 		break;
8096 	case RAZWI_EDMA:
8097 		hbw_rtr_mstr_if_base_addr = gaudi2_edma_initiator_hbw_sft[module_idx];
8098 		dcore_id = module_idx / NUM_OF_EDMA_PER_DCORE;
8099 		/* SFT has separate MSTR_IF for LBW, only there we can
8100 		 * read the LBW razwi related registers
8101 		 */
8102 		lbw_rtr_mstr_if_base_addr = mmSFT0_LBW_RTR_IF_MSTR_IF_RR_SHRD_HBW_BASE +
8103 								dcore_id * SFT_DCORE_OFFSET;
8104 		via_sft = true;
8105 		sprintf(initiator_name, "EDMA_%u", module_idx);
8106 		break;
8107 	case RAZWI_PDMA:
8108 		hbw_rtr_id = gaudi2_pdma_initiator_hbw_rtr_id[module_idx];
8109 		lbw_rtr_id = gaudi2_pdma_initiator_lbw_rtr_id[module_idx];
8110 		sprintf(initiator_name, "PDMA_%u", module_idx);
8111 		break;
8112 	case RAZWI_NIC:
8113 		hbw_rtr_id = gaudi2_nic_initiator_hbw_rtr_id[module_idx];
8114 		lbw_rtr_id = gaudi2_nic_initiator_lbw_rtr_id[module_idx];
8115 		sprintf(initiator_name, "NIC_%u", module_idx);
8116 		break;
8117 	case RAZWI_DEC:
8118 		hbw_rtr_id = gaudi2_dec_initiator_hbw_rtr_id[module_idx];
8119 		lbw_rtr_id = gaudi2_dec_initiator_lbw_rtr_id[module_idx];
8120 		sprintf(initiator_name, "DEC_%u", module_idx);
8121 		break;
8122 	case RAZWI_ROT:
8123 		hbw_rtr_id = gaudi2_rot_initiator_hbw_rtr_id[module_idx];
8124 		lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx];
8125 		sprintf(initiator_name, "ROT_%u", module_idx);
8126 		break;
8127 	default:
8128 		return;
8129 	}
8130 
8131 	/* Find router mstr_if register base */
8132 	if (!via_sft) {
8133 		dcore_id = hbw_rtr_id / NUM_OF_RTR_PER_DCORE;
8134 		dcore_rtr_id = hbw_rtr_id % NUM_OF_RTR_PER_DCORE;
8135 		hbw_rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
8136 				dcore_id * DCORE_OFFSET +
8137 				dcore_rtr_id * DCORE_RTR_OFFSET +
8138 				RTR_MSTR_IF_OFFSET;
8139 		lbw_rtr_mstr_if_base_addr = hbw_rtr_mstr_if_base_addr +
8140 				(((s32)lbw_rtr_id - hbw_rtr_id) * DCORE_RTR_OFFSET);
8141 	}
8142 
8143 	/* Find out event cause by reading "RAZWI_HAPPENED" registers */
8144 	hbw_shrd_aw = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
8145 	hbw_shrd_ar = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
8146 	lbw_shrd_aw = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
8147 	lbw_shrd_ar = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
8148 
8149 	eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx);
8150 	if (hbw_shrd_aw) {
8151 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, true,
8152 						initiator_name, eng_id, event_mask);
8153 
8154 		/* Clear event indication */
8155 		WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
8156 	}
8157 
8158 	if (hbw_shrd_ar) {
8159 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, false,
8160 						initiator_name, eng_id, event_mask);
8161 
8162 		/* Clear event indication */
8163 		WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
8164 	}
8165 
8166 	if (lbw_shrd_aw) {
8167 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, true,
8168 						initiator_name, eng_id, event_mask);
8169 
8170 		/* Clear event indication */
8171 		WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
8172 	}
8173 
8174 	if (lbw_shrd_ar) {
8175 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, false,
8176 						initiator_name, eng_id, event_mask);
8177 
8178 		/* Clear event indication */
8179 		WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
8180 	}
8181 }
8182 
8183 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
8184 {
8185 	struct asic_fixed_properties *prop = &hdev->asic_prop;
8186 	u8 mod_idx, sub_mod;
8187 
8188 	/* check all TPCs */
8189 	for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
8190 		if (prop->tpc_enabled_mask & BIT(mod_idx))
8191 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL);
8192 	}
8193 
8194 	/* check all MMEs */
8195 	for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8196 		for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
8197 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
8198 									sub_mod, NULL);
8199 
8200 	/* check all EDMAs */
8201 	for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8202 		if (prop->edma_enabled_mask & BIT(mod_idx))
8203 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL);
8204 
8205 	/* check all PDMAs */
8206 	for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
8207 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL);
8208 
8209 	/* check all NICs */
8210 	for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
8211 		if (hdev->nic_ports_mask & BIT(mod_idx))
8212 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
8213 								NULL);
8214 
8215 	/* check all DECs */
8216 	for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
8217 		if (prop->decoder_enabled_mask & BIT(mod_idx))
8218 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL);
8219 
8220 	/* check all ROTs */
8221 	for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
8222 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL);
8223 }
8224 
8225 static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u32 array_size,
8226 						u32 axuser_xy, u32 *base, u16 *eng_id,
8227 						char *eng_name)
8228 {
8229 
8230 	int i, num_of_eng = 0;
8231 	u16 str_size = 0;
8232 
8233 	for (i = 0 ; i < array_size ; i++) {
8234 		if (axuser_xy != razwi_info[i].axuser_xy)
8235 			continue;
8236 
8237 		eng_id[num_of_eng] = razwi_info[i].eng_id;
8238 		base[num_of_eng] = razwi_info[i].rtr_ctrl;
8239 		if (!num_of_eng)
8240 			str_size += snprintf(eng_name + str_size,
8241 						PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s",
8242 						razwi_info[i].eng_name);
8243 		else
8244 			str_size += snprintf(eng_name + str_size,
8245 						PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s",
8246 						razwi_info[i].eng_name);
8247 		num_of_eng++;
8248 	}
8249 
8250 	return num_of_eng;
8251 }
8252 
8253 static bool gaudi2_handle_psoc_razwi_happened(struct hl_device *hdev, u32 razwi_reg,
8254 						u64 *event_mask)
8255 {
8256 	u32 axuser_xy = RAZWI_GET_AXUSER_XY(razwi_reg), addr_hi = 0, addr_lo = 0;
8257 	u32 base[PSOC_RAZWI_MAX_ENG_PER_RTR];
8258 	u16 num_of_eng, eng_id[PSOC_RAZWI_MAX_ENG_PER_RTR];
8259 	char eng_name_str[PSOC_RAZWI_ENG_STR_SIZE];
8260 	bool razwi_happened = false;
8261 	u64 addr;
8262 	int i;
8263 
8264 	num_of_eng = gaudi2_psoc_razwi_get_engines(common_razwi_info, ARRAY_SIZE(common_razwi_info),
8265 							axuser_xy, base, eng_id, eng_name_str);
8266 
8267 	/* If no match for XY coordinates, try to find it in MME razwi table */
8268 	if (!num_of_eng) {
8269 		axuser_xy = RAZWI_GET_AXUSER_LOW_XY(razwi_reg);
8270 		num_of_eng = gaudi2_psoc_razwi_get_engines(mme_razwi_info,
8271 								ARRAY_SIZE(mme_razwi_info),
8272 								axuser_xy, base, eng_id,
8273 								eng_name_str);
8274 	}
8275 
8276 	for  (i = 0 ; i < num_of_eng ; i++) {
8277 		if (RREG32(base[i] + DEC_RAZWI_HBW_AW_SET)) {
8278 			addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_HI);
8279 			addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_LO);
8280 			addr = ((u64)addr_hi << 32) + addr_lo;
8281 			if (addr) {
8282 				dev_err(hdev->dev,
8283 					"PSOC HBW AW RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8284 					eng_name_str, addr);
8285 				hl_handle_razwi(hdev, addr, &eng_id[0],
8286 					num_of_eng, HL_RAZWI_HBW | HL_RAZWI_WRITE, event_mask);
8287 				razwi_happened = true;
8288 			}
8289 		}
8290 
8291 		if (RREG32(base[i] + DEC_RAZWI_HBW_AR_SET)) {
8292 			addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_HI);
8293 			addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_LO);
8294 			addr = ((u64)addr_hi << 32) + addr_lo;
8295 			if (addr) {
8296 				dev_err(hdev->dev,
8297 					"PSOC HBW AR RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8298 					eng_name_str, addr);
8299 				hl_handle_razwi(hdev, addr, &eng_id[0],
8300 					num_of_eng, HL_RAZWI_HBW | HL_RAZWI_READ, event_mask);
8301 				razwi_happened = true;
8302 			}
8303 		}
8304 
8305 		if (RREG32(base[i] + DEC_RAZWI_LBW_AW_SET)) {
8306 			addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AW_ADDR);
8307 			if (addr_lo) {
8308 				dev_err(hdev->dev,
8309 					"PSOC LBW AW RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8310 					eng_name_str, addr_lo);
8311 				hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8312 					num_of_eng, HL_RAZWI_LBW | HL_RAZWI_WRITE, event_mask);
8313 				razwi_happened = true;
8314 			}
8315 		}
8316 
8317 		if (RREG32(base[i] + DEC_RAZWI_LBW_AR_SET)) {
8318 			addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AR_ADDR);
8319 			if (addr_lo) {
8320 				dev_err(hdev->dev,
8321 						"PSOC LBW AR RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8322 						eng_name_str, addr_lo);
8323 				hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8324 					num_of_eng, HL_RAZWI_LBW | HL_RAZWI_READ, event_mask);
8325 				razwi_happened = true;
8326 			}
8327 		}
8328 		/* In common case the loop will break, when there is only one engine id, or
8329 		 * several engines with the same router. The exceptional case is with psoc razwi
8330 		 * from EDMA, where it's possible to get axuser id which fits 2 routers (2
8331 		 * interfaces of sft router). In this case, maybe the first router won't hold info
8332 		 * and we will need to iterate on the other router.
8333 		 */
8334 		if (razwi_happened)
8335 			break;
8336 	}
8337 
8338 	return razwi_happened;
8339 }
8340 
8341 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
8342 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask)
8343 {
8344 	u32 razwi_mask_info, razwi_intr = 0, error_count = 0;
8345 
8346 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
8347 		razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
8348 		if (!razwi_intr)
8349 			return 0;
8350 	}
8351 
8352 	razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
8353 
8354 	dev_err_ratelimited(hdev->dev,
8355 		"PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
8356 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
8357 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
8358 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
8359 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info),
8360 		FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
8361 
8362 	if (gaudi2_handle_psoc_razwi_happened(hdev, razwi_mask_info, event_mask))
8363 		error_count++;
8364 	else
8365 		dev_err_ratelimited(hdev->dev,
8366 				"PSOC RAZWI interrupt: invalid razwi info (0x%x)\n",
8367 				razwi_mask_info);
8368 
8369 	/* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
8370 	if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
8371 		WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
8372 
8373 	return error_count;
8374 }
8375 
8376 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type)
8377 {
8378 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8379 
8380 	sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET);
8381 
8382 	for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) {
8383 		if (sts_val & BIT(i)) {
8384 			gaudi2_print_event(hdev, event_type, true,
8385 				"err cause: %s", gaudi2_qm_sei_error_cause[i]);
8386 			sts_clr_val |= BIT(i);
8387 			error_count++;
8388 		}
8389 	}
8390 
8391 	WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val);
8392 
8393 	return error_count;
8394 }
8395 
8396 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
8397 					bool extended_err_check, u64 *event_mask)
8398 {
8399 	enum razwi_event_sources module;
8400 	u32 error_count = 0;
8401 	u64 qman_base;
8402 	u8 index;
8403 
8404 	switch (event_type) {
8405 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP:
8406 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
8407 		qman_base = mmDCORE0_TPC0_QM_BASE +
8408 				(index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
8409 				(index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
8410 		module = RAZWI_TPC;
8411 		break;
8412 	case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
8413 		qman_base = mmDCORE0_TPC6_QM_BASE;
8414 		module = RAZWI_TPC;
8415 		break;
8416 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
8417 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
8418 	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
8419 	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
8420 		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
8421 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
8422 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
8423 		qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
8424 		module = RAZWI_MME;
8425 		break;
8426 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
8427 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
8428 		index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
8429 		qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
8430 		module = RAZWI_PDMA;
8431 		break;
8432 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
8433 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
8434 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
8435 		qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
8436 		module = RAZWI_ROT;
8437 		break;
8438 	default:
8439 		return 0;
8440 	}
8441 
8442 	error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8443 
8444 	/* There is a single event per NIC macro, so should check its both QMAN blocks */
8445 	if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE &&
8446 			event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE)
8447 		error_count += _gaudi2_handle_qm_sei_err(hdev,
8448 					qman_base + NIC_QM_OFFSET, event_type);
8449 
8450 	if (extended_err_check) {
8451 		/* check if RAZWI happened */
8452 		gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, event_mask);
8453 		hl_check_for_glbl_errors(hdev);
8454 	}
8455 
8456 	return error_count;
8457 }
8458 
8459 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8460 {
8461 	u32 qid_base, error_count = 0;
8462 	u64 qman_base;
8463 	u8 index = 0;
8464 
8465 	switch (event_type) {
8466 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
8467 		index = event_type - GAUDI2_EVENT_TPC0_QM;
8468 		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS;
8469 		qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8470 		break;
8471 	case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM:
8472 		index = event_type - GAUDI2_EVENT_TPC6_QM;
8473 		qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS;
8474 		qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8475 		break;
8476 	case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM:
8477 		index = event_type - GAUDI2_EVENT_TPC12_QM;
8478 		qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS;
8479 		qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8480 		break;
8481 	case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM:
8482 		index = event_type - GAUDI2_EVENT_TPC18_QM;
8483 		qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS;
8484 		qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8485 		break;
8486 	case GAUDI2_EVENT_TPC24_QM:
8487 		qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
8488 		qman_base = mmDCORE0_TPC6_QM_BASE;
8489 		break;
8490 	case GAUDI2_EVENT_MME0_QM:
8491 		qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
8492 		qman_base = mmDCORE0_MME_QM_BASE;
8493 		break;
8494 	case GAUDI2_EVENT_MME1_QM:
8495 		qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
8496 		qman_base = mmDCORE1_MME_QM_BASE;
8497 		break;
8498 	case GAUDI2_EVENT_MME2_QM:
8499 		qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
8500 		qman_base = mmDCORE2_MME_QM_BASE;
8501 		break;
8502 	case GAUDI2_EVENT_MME3_QM:
8503 		qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
8504 		qman_base = mmDCORE3_MME_QM_BASE;
8505 		break;
8506 	case GAUDI2_EVENT_HDMA0_QM:
8507 		index = 0;
8508 		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0;
8509 		qman_base = mmDCORE0_EDMA0_QM_BASE;
8510 		break;
8511 	case GAUDI2_EVENT_HDMA1_QM:
8512 		index = 1;
8513 		qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0;
8514 		qman_base = mmDCORE0_EDMA1_QM_BASE;
8515 		break;
8516 	case GAUDI2_EVENT_HDMA2_QM:
8517 		index = 2;
8518 		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0;
8519 		qman_base = mmDCORE1_EDMA0_QM_BASE;
8520 		break;
8521 	case GAUDI2_EVENT_HDMA3_QM:
8522 		index = 3;
8523 		qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0;
8524 		qman_base = mmDCORE1_EDMA1_QM_BASE;
8525 		break;
8526 	case GAUDI2_EVENT_HDMA4_QM:
8527 		index = 4;
8528 		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0;
8529 		qman_base = mmDCORE2_EDMA0_QM_BASE;
8530 		break;
8531 	case GAUDI2_EVENT_HDMA5_QM:
8532 		index = 5;
8533 		qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0;
8534 		qman_base = mmDCORE2_EDMA1_QM_BASE;
8535 		break;
8536 	case GAUDI2_EVENT_HDMA6_QM:
8537 		index = 6;
8538 		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0;
8539 		qman_base = mmDCORE3_EDMA0_QM_BASE;
8540 		break;
8541 	case GAUDI2_EVENT_HDMA7_QM:
8542 		index = 7;
8543 		qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0;
8544 		qman_base = mmDCORE3_EDMA1_QM_BASE;
8545 		break;
8546 	case GAUDI2_EVENT_PDMA0_QM:
8547 		qid_base = GAUDI2_QUEUE_ID_PDMA_0_0;
8548 		qman_base = mmPDMA0_QM_BASE;
8549 		break;
8550 	case GAUDI2_EVENT_PDMA1_QM:
8551 		qid_base = GAUDI2_QUEUE_ID_PDMA_1_0;
8552 		qman_base = mmPDMA1_QM_BASE;
8553 		break;
8554 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
8555 		qid_base = GAUDI2_QUEUE_ID_ROT_0_0;
8556 		qman_base = mmROT0_QM_BASE;
8557 		break;
8558 	case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
8559 		qid_base = GAUDI2_QUEUE_ID_ROT_1_0;
8560 		qman_base = mmROT1_QM_BASE;
8561 		break;
8562 	default:
8563 		return 0;
8564 	}
8565 
8566 	error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base);
8567 
8568 	/* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
8569 	if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) {
8570 		error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8571 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, index, 0, event_mask);
8572 	}
8573 
8574 	hl_check_for_glbl_errors(hdev);
8575 
8576 	return error_count;
8577 }
8578 
8579 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type)
8580 {
8581 	u32 i, sts_val, sts_clr_val, error_count = 0, arc_farm;
8582 
8583 	for (arc_farm = 0 ; arc_farm < NUM_OF_ARC_FARMS_ARC ; arc_farm++) {
8584 		sts_clr_val = 0;
8585 		sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS +
8586 				(arc_farm * ARC_FARM_OFFSET));
8587 
8588 		for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
8589 			if (sts_val & BIT(i)) {
8590 				gaudi2_print_event(hdev, event_type, true,
8591 						"ARC FARM ARC %u err cause: %s",
8592 						arc_farm, gaudi2_arc_sei_error_cause[i]);
8593 				sts_clr_val |= BIT(i);
8594 				error_count++;
8595 			}
8596 		}
8597 		WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR + (arc_farm * ARC_FARM_OFFSET),
8598 				sts_clr_val);
8599 	}
8600 
8601 	hl_check_for_glbl_errors(hdev);
8602 
8603 	return error_count;
8604 }
8605 
8606 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type)
8607 {
8608 	u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8609 
8610 	sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS);
8611 
8612 	for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) {
8613 		if (sts_val & BIT(i)) {
8614 			gaudi2_print_event(hdev, event_type, true,
8615 				"err cause: %s", gaudi2_cpu_sei_error_cause[i]);
8616 			sts_clr_val |= BIT(i);
8617 			error_count++;
8618 		}
8619 	}
8620 
8621 	hl_check_for_glbl_errors(hdev);
8622 
8623 	WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val);
8624 
8625 	return error_count;
8626 }
8627 
8628 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type,
8629 					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8630 					u64 *event_mask)
8631 {
8632 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8633 	u32 error_count = 0;
8634 	int i;
8635 
8636 	for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++)
8637 		if (intr_cause_data & BIT(i)) {
8638 			gaudi2_print_event(hdev, event_type, true,
8639 				"err cause: %s", guadi2_rot_error_cause[i]);
8640 			error_count++;
8641 		}
8642 
8643 	/* check if RAZWI happened */
8644 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, event_mask);
8645 	hl_check_for_glbl_errors(hdev);
8646 
8647 	return error_count;
8648 }
8649 
8650 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev,  u8 tpc_index, u16 event_type,
8651 					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8652 					u64 *event_mask)
8653 {
8654 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8655 	u32 error_count = 0;
8656 	int i;
8657 
8658 	for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++)
8659 		if (intr_cause_data & BIT(i)) {
8660 			gaudi2_print_event(hdev, event_type, true,
8661 				"interrupt cause: %s",  gaudi2_tpc_interrupts_cause[i]);
8662 			error_count++;
8663 		}
8664 
8665 	/* check if RAZWI happened */
8666 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, event_mask);
8667 	hl_check_for_glbl_errors(hdev);
8668 
8669 	return error_count;
8670 }
8671 
8672 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type,
8673 					u64 *event_mask)
8674 {
8675 	u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0;
8676 	int i;
8677 
8678 	if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES)
8679 		/* DCORE DEC */
8680 		sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR +
8681 				DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) +
8682 				DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE);
8683 	else
8684 		/* PCIE DEC */
8685 		sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET *
8686 				(dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES);
8687 
8688 	sts_val = RREG32(sts_addr);
8689 
8690 	for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) {
8691 		if (sts_val & BIT(i)) {
8692 			gaudi2_print_event(hdev, event_type, true,
8693 				"err cause: %s", gaudi2_dec_error_cause[i]);
8694 			sts_clr_val |= BIT(i);
8695 			error_count++;
8696 		}
8697 	}
8698 
8699 	/* check if RAZWI happened */
8700 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, event_mask);
8701 	hl_check_for_glbl_errors(hdev);
8702 
8703 	/* Write 1 clear errors */
8704 	WREG32(sts_addr, sts_clr_val);
8705 
8706 	return error_count;
8707 }
8708 
8709 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8710 					u64 *event_mask)
8711 {
8712 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8713 	int i;
8714 
8715 	sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index;
8716 	sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index;
8717 
8718 	sts_val = RREG32(sts_addr);
8719 
8720 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) {
8721 		if (sts_val & BIT(i)) {
8722 			gaudi2_print_event(hdev, event_type, true,
8723 				"err cause: %s", guadi2_mme_error_cause[i]);
8724 			sts_clr_val |= BIT(i);
8725 			error_count++;
8726 		}
8727 	}
8728 
8729 	/* check if RAZWI happened */
8730 	for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
8731 		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, event_mask);
8732 
8733 	hl_check_for_glbl_errors(hdev);
8734 
8735 	WREG32(sts_clr_addr, sts_clr_val);
8736 
8737 	return error_count;
8738 }
8739 
8740 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type,
8741 					u64 intr_cause_data)
8742 {
8743 	int i, error_count = 0;
8744 
8745 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++)
8746 		if (intr_cause_data & BIT(i)) {
8747 			gaudi2_print_event(hdev, event_type, true,
8748 				"err cause: %s", guadi2_mme_sbte_error_cause[i]);
8749 			error_count++;
8750 		}
8751 
8752 	hl_check_for_glbl_errors(hdev);
8753 
8754 	return error_count;
8755 }
8756 
8757 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8758 					u64 *event_mask)
8759 {
8760 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8761 	int i;
8762 
8763 	sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index;
8764 	sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index;
8765 
8766 	sts_val = RREG32(sts_addr);
8767 
8768 	for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) {
8769 		if (sts_val & BIT(i)) {
8770 			gaudi2_print_event(hdev, event_type, true,
8771 				"err cause: %s", guadi2_mme_wap_error_cause[i]);
8772 			sts_clr_val |= BIT(i);
8773 			error_count++;
8774 		}
8775 	}
8776 
8777 	/* check if RAZWI happened on WAP0/1 */
8778 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, event_mask);
8779 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, event_mask);
8780 	hl_check_for_glbl_errors(hdev);
8781 
8782 	WREG32(sts_clr_addr, sts_clr_val);
8783 
8784 	return error_count;
8785 }
8786 
8787 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type,
8788 					u64 intr_cause_data)
8789 {
8790 	u32 error_count = 0;
8791 	int i;
8792 
8793 	/* If an AXI read or write error is received, an error is reported and
8794 	 * interrupt message is sent. Due to an HW errata, when reading the cause
8795 	 * register of the KDMA engine, the reported error is always HBW even if
8796 	 * the actual error caused by a LBW KDMA transaction.
8797 	 */
8798 	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8799 		if (intr_cause_data & BIT(i)) {
8800 			gaudi2_print_event(hdev, event_type, true,
8801 				"err cause: %s", gaudi2_kdma_core_interrupts_cause[i]);
8802 			error_count++;
8803 		}
8804 
8805 	hl_check_for_glbl_errors(hdev);
8806 
8807 	return error_count;
8808 }
8809 
8810 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, u64 intr_cause)
8811 {
8812 	u32 error_count = 0;
8813 	int i;
8814 
8815 	for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8816 		if (intr_cause & BIT(i)) {
8817 			gaudi2_print_event(hdev, event_type, true,
8818 				"err cause: %s", gaudi2_dma_core_interrupts_cause[i]);
8819 			error_count++;
8820 		}
8821 
8822 	hl_check_for_glbl_errors(hdev);
8823 
8824 	return error_count;
8825 }
8826 
8827 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
8828 {
8829 	u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
8830 
8831 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
8832 	if (RREG32(razwi_happened_addr)) {
8833 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8834 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8835 		WREG32(razwi_happened_addr, 0x1);
8836 	}
8837 
8838 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
8839 	if (RREG32(razwi_happened_addr)) {
8840 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8841 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8842 		WREG32(razwi_happened_addr, 0x1);
8843 	}
8844 
8845 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
8846 	if (RREG32(razwi_happened_addr)) {
8847 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8848 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8849 		WREG32(razwi_happened_addr, 0x1);
8850 	}
8851 
8852 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
8853 	if (RREG32(razwi_happened_addr)) {
8854 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8855 							GAUDI2_ENGINE_ID_PCIE, event_mask);
8856 		WREG32(razwi_happened_addr, 0x1);
8857 	}
8858 }
8859 
8860 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type,
8861 					u64 intr_cause_data, u64 *event_mask)
8862 {
8863 	u32 error_count = 0;
8864 	int i;
8865 
8866 	for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
8867 		if (!(intr_cause_data & BIT_ULL(i)))
8868 			continue;
8869 
8870 		gaudi2_print_event(hdev, event_type, true,
8871 			"err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]);
8872 		error_count++;
8873 
8874 		/*
8875 		 * Always check for LBW and HBW additional info as the indication itself is
8876 		 * sometimes missing
8877 		 */
8878 		hl_check_for_glbl_errors(hdev);
8879 		gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
8880 	}
8881 
8882 	return error_count;
8883 }
8884 
8885 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type,
8886 				u64 intr_cause_data)
8887 
8888 {
8889 	u32 error_count = 0;
8890 	int i;
8891 
8892 	for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) {
8893 		if (intr_cause_data & BIT_ULL(i)) {
8894 			gaudi2_print_event(hdev, event_type, true,
8895 				"err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]);
8896 			error_count++;
8897 		}
8898 	}
8899 
8900 	return error_count;
8901 }
8902 
8903 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data)
8904 {
8905 	u32 error_count = 0;
8906 	int i;
8907 
8908 	for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) {
8909 		if (intr_cause_data & BIT_ULL(i)) {
8910 			gaudi2_print_event(hdev, event_type, true,
8911 				"err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]);
8912 			error_count++;
8913 		}
8914 	}
8915 
8916 	return error_count;
8917 }
8918 
8919 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu,
8920 					u64 *event_mask)
8921 {
8922 	u32 valid, val;
8923 	u64 addr;
8924 
8925 	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8926 
8927 	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK))
8928 		return;
8929 
8930 	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE));
8931 	addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK;
8932 	addr <<= 32;
8933 	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
8934 
8935 	if (!is_pmmu)
8936 		addr = gaudi2_mmu_descramble_addr(hdev, addr);
8937 
8938 	dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx\n",
8939 				is_pmmu ? "PMMU" : "HMMU", addr);
8940 	hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask);
8941 
8942 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
8943 }
8944 
8945 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
8946 {
8947 	u32 valid, val;
8948 	u64 addr;
8949 
8950 	valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
8951 
8952 	if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK))
8953 		return;
8954 
8955 	val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE));
8956 	addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK;
8957 	addr <<= 32;
8958 	addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
8959 
8960 	if (!is_pmmu)
8961 		addr = gaudi2_mmu_descramble_addr(hdev, addr);
8962 
8963 	dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
8964 				is_pmmu ? "PMMU" : "HMMU", addr);
8965 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
8966 }
8967 
8968 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type,
8969 						u64 mmu_base, bool is_pmmu, u64 *event_mask)
8970 {
8971 	u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0;
8972 	int i;
8973 
8974 	spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET);
8975 
8976 	for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) {
8977 		if (spi_sei_cause & BIT(i)) {
8978 			gaudi2_print_event(hdev, event_type, true,
8979 				"err cause: %s", gaudi2_mmu_spi_sei[i].cause);
8980 
8981 			if (i == 0)
8982 				gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask);
8983 			else if (i == 1)
8984 				gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
8985 
8986 			if (gaudi2_mmu_spi_sei[i].clear_bit >= 0)
8987 				interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit);
8988 
8989 			error_count++;
8990 		}
8991 	}
8992 
8993 	/* Clear cause */
8994 	WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause);
8995 
8996 	/* Clear interrupt */
8997 	WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
8998 
8999 	return error_count;
9000 }
9001 
9002 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index)
9003 {
9004 	u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log,
9005 		cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0;
9006 	int i;
9007 
9008 	sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
9009 	cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index;
9010 
9011 	sei_cause_val = RREG32(sei_cause_addr);
9012 	sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val);
9013 	cq_intr_val = RREG32(cq_intr_addr);
9014 
9015 	/* SEI interrupt */
9016 	if (sei_cause_cause) {
9017 		/* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */
9018 		sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK,
9019 					sei_cause_val);
9020 
9021 		for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) {
9022 			if (!(sei_cause_cause & BIT(i)))
9023 				continue;
9024 
9025 			gaudi2_print_event(hdev, event_type, true,
9026 				"err cause: %s. %s: 0x%X",
9027 				gaudi2_sm_sei_cause[i].cause_name,
9028 				gaudi2_sm_sei_cause[i].log_name,
9029 				sei_cause_log);
9030 			error_count++;
9031 			break;
9032 		}
9033 
9034 		/* Clear SM_SEI_CAUSE */
9035 		WREG32(sei_cause_addr, 0);
9036 	}
9037 
9038 	/* CQ interrupt */
9039 	if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) {
9040 		cq_intr_queue_index =
9041 				FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK,
9042 					cq_intr_val);
9043 
9044 		dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n",
9045 				sm_index, cq_intr_queue_index);
9046 		error_count++;
9047 
9048 		/* Clear CQ_INTR */
9049 		WREG32(cq_intr_addr, 0);
9050 	}
9051 
9052 	hl_check_for_glbl_errors(hdev);
9053 
9054 	return error_count;
9055 }
9056 
9057 static u64 get_hmmu_base(u16 event_type)
9058 {
9059 	u8 dcore, index_in_dcore;
9060 
9061 	switch (event_type) {
9062 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP:
9063 	case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU0_SECURITY_ERROR:
9064 		dcore = 0;
9065 		index_in_dcore = 0;
9066 	break;
9067 	case GAUDI2_EVENT_HMMU_1_AXI_ERR_RSP:
9068 	case GAUDI2_EVENT_HMMU1_SPI_BASE ... GAUDI2_EVENT_HMMU1_SECURITY_ERROR:
9069 		dcore = 1;
9070 		index_in_dcore = 0;
9071 	break;
9072 	case GAUDI2_EVENT_HMMU_2_AXI_ERR_RSP:
9073 	case GAUDI2_EVENT_HMMU2_SPI_BASE ... GAUDI2_EVENT_HMMU2_SECURITY_ERROR:
9074 		dcore = 0;
9075 		index_in_dcore = 1;
9076 	break;
9077 	case GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
9078 	case GAUDI2_EVENT_HMMU3_SPI_BASE ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
9079 		dcore = 1;
9080 		index_in_dcore = 1;
9081 	break;
9082 	case GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
9083 	case GAUDI2_EVENT_HMMU4_SPI_BASE ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
9084 		dcore = 3;
9085 		index_in_dcore = 2;
9086 	break;
9087 	case GAUDI2_EVENT_HMMU_5_AXI_ERR_RSP:
9088 	case GAUDI2_EVENT_HMMU5_SPI_BASE ... GAUDI2_EVENT_HMMU5_SECURITY_ERROR:
9089 		dcore = 2;
9090 		index_in_dcore = 2;
9091 	break;
9092 	case GAUDI2_EVENT_HMMU_6_AXI_ERR_RSP:
9093 	case GAUDI2_EVENT_HMMU6_SPI_BASE ... GAUDI2_EVENT_HMMU6_SECURITY_ERROR:
9094 		dcore = 3;
9095 		index_in_dcore = 3;
9096 	break;
9097 	case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP:
9098 	case GAUDI2_EVENT_HMMU7_SPI_BASE ... GAUDI2_EVENT_HMMU7_SECURITY_ERROR:
9099 		dcore = 2;
9100 		index_in_dcore = 3;
9101 	break;
9102 	case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP:
9103 	case GAUDI2_EVENT_HMMU8_SPI_BASE ... GAUDI2_EVENT_HMMU8_SECURITY_ERROR:
9104 		dcore = 0;
9105 		index_in_dcore = 2;
9106 	break;
9107 	case GAUDI2_EVENT_HMMU_9_AXI_ERR_RSP:
9108 	case GAUDI2_EVENT_HMMU9_SPI_BASE ... GAUDI2_EVENT_HMMU9_SECURITY_ERROR:
9109 		dcore = 1;
9110 		index_in_dcore = 2;
9111 	break;
9112 	case GAUDI2_EVENT_HMMU_10_AXI_ERR_RSP:
9113 	case GAUDI2_EVENT_HMMU10_SPI_BASE ... GAUDI2_EVENT_HMMU10_SECURITY_ERROR:
9114 		dcore = 0;
9115 		index_in_dcore = 3;
9116 	break;
9117 	case GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
9118 	case GAUDI2_EVENT_HMMU11_SPI_BASE ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
9119 		dcore = 1;
9120 		index_in_dcore = 3;
9121 	break;
9122 	case GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9123 	case GAUDI2_EVENT_HMMU12_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9124 		dcore = 3;
9125 		index_in_dcore = 0;
9126 	break;
9127 	case GAUDI2_EVENT_HMMU_13_AXI_ERR_RSP:
9128 	case GAUDI2_EVENT_HMMU13_SPI_BASE ... GAUDI2_EVENT_HMMU13_SECURITY_ERROR:
9129 		dcore = 2;
9130 		index_in_dcore = 0;
9131 	break;
9132 	case GAUDI2_EVENT_HMMU_14_AXI_ERR_RSP:
9133 	case GAUDI2_EVENT_HMMU14_SPI_BASE ... GAUDI2_EVENT_HMMU14_SECURITY_ERROR:
9134 		dcore = 3;
9135 		index_in_dcore = 1;
9136 	break;
9137 	case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP:
9138 	case GAUDI2_EVENT_HMMU15_SPI_BASE ... GAUDI2_EVENT_HMMU15_SECURITY_ERROR:
9139 		dcore = 2;
9140 		index_in_dcore = 1;
9141 	break;
9142 	default:
9143 		return ULONG_MAX;
9144 	}
9145 
9146 	return mmDCORE0_HMMU0_MMU_BASE + dcore * DCORE_OFFSET + index_in_dcore * DCORE_HMMU_OFFSET;
9147 }
9148 
9149 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9150 {
9151 	bool is_pmmu = false;
9152 	u32 error_count = 0;
9153 	u64 mmu_base;
9154 
9155 	switch (event_type) {
9156 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9157 	case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9158 		mmu_base = get_hmmu_base(event_type);
9159 		break;
9160 
9161 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9162 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9163 		is_pmmu = true;
9164 		mmu_base = mmPMMU_HBW_MMU_BASE;
9165 		break;
9166 	default:
9167 		return 0;
9168 	}
9169 
9170 	if (mmu_base == ULONG_MAX)
9171 		return 0;
9172 
9173 	error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base,
9174 							is_pmmu, event_mask);
9175 	hl_check_for_glbl_errors(hdev);
9176 
9177 	return error_count;
9178 }
9179 
9180 
9181 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */
9182 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
9183 			struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
9184 {
9185 	u32 addr, beat, beat_shift;
9186 	bool rc = false;
9187 
9188 	dev_err_ratelimited(hdev->dev,
9189 			"READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
9190 			FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt),
9191 			FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt),
9192 			FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt));
9193 
9194 	addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val);
9195 	dev_err_ratelimited(hdev->dev,
9196 			"READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n",
9197 			FIELD_GET(HBM_RD_ADDR_SID_MASK, addr),
9198 			FIELD_GET(HBM_RD_ADDR_BG_MASK, addr),
9199 			FIELD_GET(HBM_RD_ADDR_BA_MASK, addr),
9200 			FIELD_GET(HBM_RD_ADDR_COL_MASK, addr),
9201 			FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr));
9202 
9203 	/* For each beat (RDQS edge), look for possible errors and print relevant info */
9204 	for (beat = 0 ; beat < 4 ; beat++) {
9205 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9206 			(HBM_RD_ERR_SERR_BEAT0_MASK << beat))
9207 			dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n",
9208 						beat,
9209 						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9210 						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9211 
9212 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9213 			(HBM_RD_ERR_DERR_BEAT0_MASK << beat)) {
9214 			dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n",
9215 						beat,
9216 						le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9217 						le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9218 			rc |= true;
9219 		}
9220 
9221 		beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
9222 		if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9223 			(HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) {
9224 			dev_err_ratelimited(hdev->dev,
9225 					"Beat%d read PARITY: DM: %#x, PAR data: %#x\n",
9226 					beat,
9227 					le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9228 					(le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9229 						(HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
9230 						(HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
9231 			rc |= true;
9232 		}
9233 
9234 		dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
9235 		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9236 					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2]));
9237 		dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9238 					le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
9239 	}
9240 
9241 	return rc;
9242 }
9243 
9244 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
9245 			struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt)
9246 {
9247 	struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds;
9248 	u32 i, curr_addr, derr = wr_par_err_data->dbg_derr;
9249 
9250 	dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt);
9251 
9252 	dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n",
9253 				derr & 0x3, derr & 0xc);
9254 
9255 	/* JIRA H6-3286 - the following prints may not be valid */
9256 	dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n");
9257 	for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) {
9258 		curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr);
9259 		dev_err_ratelimited(hdev->dev,
9260 				"\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n",
9261 				i,
9262 				FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr),
9263 				FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr),
9264 				FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr),
9265 				FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr));
9266 	}
9267 }
9268 
9269 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev,
9270 		struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt)
9271 {
9272 	__le32 *col_cmd = ca_par_err_data->dbg_col;
9273 	__le16 *row_cmd = ca_par_err_data->dbg_row;
9274 	u32 i;
9275 
9276 	dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt);
9277 
9278 	dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n");
9279 	for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++)
9280 		dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i,
9281 			le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0),
9282 			le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0));
9283 }
9284 
9285 /* Returns true if hard reset is needed or false otherwise */
9286 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
9287 					struct hl_eq_hbm_sei_data *sei_data)
9288 {
9289 	bool require_hard_reset = false;
9290 	u32 hbm_id, mc_id, cause_idx;
9291 
9292 	hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4;
9293 	mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2;
9294 
9295 	cause_idx = sei_data->hdr.sei_cause;
9296 	if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
9297 		gaudi2_print_event(hdev, event_type, true,
9298 			"err cause: %s",
9299 			"Invalid HBM SEI event cause (%d) provided by FW", cause_idx);
9300 		return true;
9301 	}
9302 
9303 	gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical,
9304 		"System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s",
9305 		sei_data->hdr.is_critical ? "Critical" : "Non-critical",
9306 		hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
9307 		hbm_mc_sei_cause[cause_idx]);
9308 
9309 	/* Print error-specific info */
9310 	switch (cause_idx) {
9311 	case HBM_SEI_CATTRIP:
9312 		require_hard_reset = true;
9313 		break;
9314 
9315 	case  HBM_SEI_CMD_PARITY_EVEN:
9316 		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info,
9317 						le32_to_cpu(sei_data->hdr.cnt));
9318 		require_hard_reset = true;
9319 		break;
9320 
9321 	case  HBM_SEI_CMD_PARITY_ODD:
9322 		gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info,
9323 						le32_to_cpu(sei_data->hdr.cnt));
9324 		require_hard_reset = true;
9325 		break;
9326 
9327 	case HBM_SEI_WRITE_DATA_PARITY_ERR:
9328 		gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info,
9329 						le32_to_cpu(sei_data->hdr.cnt));
9330 		require_hard_reset = true;
9331 		break;
9332 
9333 	case HBM_SEI_READ_ERR:
9334 		/* Unlike other SEI events, read error requires further processing of the
9335 		 * raw data in order to determine the root cause.
9336 		 */
9337 		require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev,
9338 								&sei_data->read_err_info,
9339 								le32_to_cpu(sei_data->hdr.cnt));
9340 		break;
9341 
9342 	default:
9343 		break;
9344 	}
9345 
9346 	require_hard_reset |= !!sei_data->hdr.is_critical;
9347 
9348 	return require_hard_reset;
9349 }
9350 
9351 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type,
9352 				u64 intr_cause_data)
9353 {
9354 	if (intr_cause_data) {
9355 		gaudi2_print_event(hdev, event_type, true,
9356 			"temperature error cause: %#llx", intr_cause_data);
9357 		return 1;
9358 	}
9359 
9360 	return 0;
9361 }
9362 
9363 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data)
9364 {
9365 	u32 i, error_count = 0;
9366 
9367 	for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++)
9368 		if (intr_cause_data & hbm_mc_spi[i].mask) {
9369 			dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n",
9370 				hbm_mc_spi[i].cause);
9371 			error_count++;
9372 		}
9373 
9374 	return error_count;
9375 }
9376 
9377 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9378 {
9379 	ktime_t zero_time = ktime_set(0, 0);
9380 
9381 	mutex_lock(&hdev->clk_throttling.lock);
9382 
9383 	switch (event_type) {
9384 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9385 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
9386 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
9387 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
9388 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
9389 		dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
9390 		break;
9391 
9392 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9393 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
9394 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
9395 		dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
9396 		break;
9397 
9398 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9399 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
9400 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
9401 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
9402 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
9403 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9404 		dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
9405 		break;
9406 
9407 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9408 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
9409 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
9410 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9411 		dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
9412 		break;
9413 
9414 	default:
9415 		dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
9416 		break;
9417 	}
9418 
9419 	mutex_unlock(&hdev->clk_throttling.lock);
9420 }
9421 
9422 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type,
9423 					struct cpucp_pkt_sync_err *sync_err)
9424 {
9425 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9426 
9427 	gaudi2_print_event(hdev, event_type, false,
9428 		"FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9429 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci),
9430 		q->pi, atomic_read(&q->ci));
9431 }
9432 
9433 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
9434 {
9435 	u32 p2p_intr, msix_gw_intr, error_count = 0;
9436 
9437 	p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR);
9438 	msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR);
9439 
9440 	if (p2p_intr) {
9441 		gaudi2_print_event(hdev, event_type, true,
9442 			"pcie p2p transaction terminated due to security, req_id(0x%x)",
9443 			RREG32(mmPCIE_WRAP_P2P_REQ_ID));
9444 
9445 		WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
9446 		error_count++;
9447 	}
9448 
9449 	if (msix_gw_intr) {
9450 		gaudi2_print_event(hdev, event_type, true,
9451 			"pcie msi-x gen denied due to vector num check failure, vec(0x%X)",
9452 			RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
9453 
9454 		WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
9455 		error_count++;
9456 	}
9457 
9458 	return error_count;
9459 }
9460 
9461 static int gaudi2_handle_pcie_drain(struct hl_device *hdev,
9462 			struct hl_eq_pcie_drain_ind_data *drain_data)
9463 {
9464 	u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0;
9465 
9466 	cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
9467 	lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw);
9468 	lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw);
9469 	hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw);
9470 	hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw);
9471 
9472 	if (cause & BIT_ULL(0)) {
9473 		dev_err_ratelimited(hdev->dev,
9474 			"PCIE AXI drain LBW completed, read_err %u, write_err %u\n",
9475 			!!lbw_rd, !!lbw_wr);
9476 		error_count++;
9477 	}
9478 
9479 	if (cause & BIT_ULL(1)) {
9480 		dev_err_ratelimited(hdev->dev,
9481 			"PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n",
9482 			hbw_rd, hbw_wr);
9483 		error_count++;
9484 	}
9485 
9486 	return error_count;
9487 }
9488 
9489 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data)
9490 {
9491 	u32 error_count = 0;
9492 	int i;
9493 
9494 	for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) {
9495 		if (intr_cause_data & BIT_ULL(i)) {
9496 			dev_err_ratelimited(hdev->dev, "PSOC %s completed\n",
9497 				gaudi2_psoc_axi_drain_interrupts_cause[i]);
9498 			error_count++;
9499 		}
9500 	}
9501 
9502 	hl_check_for_glbl_errors(hdev);
9503 
9504 	return error_count;
9505 }
9506 
9507 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type,
9508 					struct cpucp_pkt_sync_err *sync_err)
9509 {
9510 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9511 
9512 	gaudi2_print_event(hdev, event_type, false,
9513 		"FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9514 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
9515 }
9516 
9517 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
9518 					struct hl_eq_engine_arc_intr_data *data)
9519 {
9520 	struct hl_engine_arc_dccm_queue_full_irq *q;
9521 	u32 intr_type, engine_id;
9522 	u64 payload;
9523 
9524 	intr_type = le32_to_cpu(data->intr_type);
9525 	engine_id = le32_to_cpu(data->engine_id);
9526 	payload = le64_to_cpu(data->payload);
9527 
9528 	switch (intr_type) {
9529 	case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ:
9530 		q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
9531 
9532 		gaudi2_print_event(hdev, event_type, true,
9533 				"ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u",
9534 				engine_id, intr_type, q->queue_index);
9535 		return 1;
9536 	default:
9537 		gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type");
9538 		return 0;
9539 	}
9540 }
9541 
9542 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
9543 {
9544 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
9545 	bool reset_required = false, is_critical = false;
9546 	u32 index, ctl, reset_flags = 0, error_count = 0;
9547 	u64 event_mask = 0;
9548 	u16 event_type;
9549 
9550 	ctl = le32_to_cpu(eq_entry->hdr.ctl);
9551 	event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
9552 
9553 	if (event_type >= GAUDI2_EVENT_SIZE) {
9554 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
9555 				event_type, GAUDI2_EVENT_SIZE - 1);
9556 		return;
9557 	}
9558 
9559 	gaudi2->events_stat[event_type]++;
9560 	gaudi2->events_stat_aggregate[event_type]++;
9561 
9562 	switch (event_type) {
9563 	case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR:
9564 		fallthrough;
9565 	case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
9566 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9567 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9568 		reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
9569 		is_critical = eq_entry->ecc_data.is_critical;
9570 		error_count++;
9571 		break;
9572 
9573 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
9574 		fallthrough;
9575 	case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM:
9576 		fallthrough;
9577 	case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
9578 		error_count = gaudi2_handle_qman_err(hdev, event_type, &event_mask);
9579 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9580 		break;
9581 
9582 	case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
9583 		error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type);
9584 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9585 		break;
9586 
9587 	case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
9588 		error_count = gaudi2_handle_cpu_sei_err(hdev, event_type);
9589 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9590 		event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR;
9591 		break;
9592 
9593 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
9594 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
9595 		error_count = gaudi2_handle_qm_sei_err(hdev, event_type, true, &event_mask);
9596 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9597 		break;
9598 
9599 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
9600 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
9601 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
9602 		error_count = gaudi2_handle_rot_err(hdev, index, event_type,
9603 					&eq_entry->razwi_with_intr_cause, &event_mask);
9604 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9605 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9606 		break;
9607 
9608 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
9609 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
9610 		error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9611 						&eq_entry->razwi_with_intr_cause, &event_mask);
9612 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9613 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9614 		break;
9615 
9616 	case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
9617 		index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
9618 		error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9619 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9620 		break;
9621 
9622 	case GAUDI2_EVENT_TPC0_KERNEL_ERR:
9623 	case GAUDI2_EVENT_TPC1_KERNEL_ERR:
9624 	case GAUDI2_EVENT_TPC2_KERNEL_ERR:
9625 	case GAUDI2_EVENT_TPC3_KERNEL_ERR:
9626 	case GAUDI2_EVENT_TPC4_KERNEL_ERR:
9627 	case GAUDI2_EVENT_TPC5_KERNEL_ERR:
9628 	case GAUDI2_EVENT_TPC6_KERNEL_ERR:
9629 	case GAUDI2_EVENT_TPC7_KERNEL_ERR:
9630 	case GAUDI2_EVENT_TPC8_KERNEL_ERR:
9631 	case GAUDI2_EVENT_TPC9_KERNEL_ERR:
9632 	case GAUDI2_EVENT_TPC10_KERNEL_ERR:
9633 	case GAUDI2_EVENT_TPC11_KERNEL_ERR:
9634 	case GAUDI2_EVENT_TPC12_KERNEL_ERR:
9635 	case GAUDI2_EVENT_TPC13_KERNEL_ERR:
9636 	case GAUDI2_EVENT_TPC14_KERNEL_ERR:
9637 	case GAUDI2_EVENT_TPC15_KERNEL_ERR:
9638 	case GAUDI2_EVENT_TPC16_KERNEL_ERR:
9639 	case GAUDI2_EVENT_TPC17_KERNEL_ERR:
9640 	case GAUDI2_EVENT_TPC18_KERNEL_ERR:
9641 	case GAUDI2_EVENT_TPC19_KERNEL_ERR:
9642 	case GAUDI2_EVENT_TPC20_KERNEL_ERR:
9643 	case GAUDI2_EVENT_TPC21_KERNEL_ERR:
9644 	case GAUDI2_EVENT_TPC22_KERNEL_ERR:
9645 	case GAUDI2_EVENT_TPC23_KERNEL_ERR:
9646 	case GAUDI2_EVENT_TPC24_KERNEL_ERR:
9647 		index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
9648 			(GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
9649 		error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9650 					&eq_entry->razwi_with_intr_cause, &event_mask);
9651 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9652 		break;
9653 
9654 	case GAUDI2_EVENT_DEC0_SPI:
9655 	case GAUDI2_EVENT_DEC1_SPI:
9656 	case GAUDI2_EVENT_DEC2_SPI:
9657 	case GAUDI2_EVENT_DEC3_SPI:
9658 	case GAUDI2_EVENT_DEC4_SPI:
9659 	case GAUDI2_EVENT_DEC5_SPI:
9660 	case GAUDI2_EVENT_DEC6_SPI:
9661 	case GAUDI2_EVENT_DEC7_SPI:
9662 	case GAUDI2_EVENT_DEC8_SPI:
9663 	case GAUDI2_EVENT_DEC9_SPI:
9664 		index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
9665 				(GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
9666 		error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9667 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9668 		break;
9669 
9670 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
9671 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
9672 	case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
9673 	case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
9674 		index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
9675 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
9676 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
9677 		error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9678 		error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9679 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9680 		break;
9681 
9682 	case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
9683 	case GAUDI2_EVENT_MME1_QMAN_SW_ERROR:
9684 	case GAUDI2_EVENT_MME2_QMAN_SW_ERROR:
9685 	case GAUDI2_EVENT_MME3_QMAN_SW_ERROR:
9686 		index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
9687 				(GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
9688 					GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
9689 		error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9690 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9691 		break;
9692 
9693 	case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
9694 	case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
9695 	case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
9696 	case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
9697 		index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
9698 				(GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
9699 					GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
9700 		error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type, &event_mask);
9701 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9702 		break;
9703 
9704 	case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
9705 	case GAUDI2_EVENT_KDMA0_CORE:
9706 		error_count = gaudi2_handle_kdma_core_event(hdev, event_type,
9707 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9708 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9709 		break;
9710 
9711 	case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_HDMA5_CORE:
9712 		error_count = gaudi2_handle_dma_core_event(hdev, event_type,
9713 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9714 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9715 		break;
9716 
9717 	case GAUDI2_EVENT_PDMA0_CORE ... GAUDI2_EVENT_PDMA1_CORE:
9718 		error_count = gaudi2_handle_dma_core_event(hdev, event_type,
9719 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9720 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9721 		break;
9722 
9723 	case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
9724 		error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type,
9725 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask);
9726 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9727 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9728 		break;
9729 
9730 	case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9731 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9732 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9733 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9734 		error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask);
9735 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9736 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9737 		break;
9738 
9739 	case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
9740 		error_count = gaudi2_handle_hif_fatal(hdev, event_type,
9741 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9742 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9743 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9744 		break;
9745 
9746 	case GAUDI2_EVENT_PMMU_FATAL_0:
9747 		error_count = gaudi2_handle_pif_fatal(hdev, event_type,
9748 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9749 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9750 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9751 		break;
9752 
9753 	case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
9754 		error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask);
9755 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9756 		break;
9757 
9758 	case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
9759 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9760 		if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
9761 			reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9762 			reset_required = true;
9763 		}
9764 		error_count++;
9765 		break;
9766 
9767 	case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
9768 		error_count = gaudi2_handle_hbm_cattrip(hdev, event_type,
9769 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9770 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9771 		break;
9772 
9773 	case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
9774 		error_count = gaudi2_handle_hbm_mc_spi(hdev,
9775 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9776 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9777 		break;
9778 
9779 	case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
9780 		error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
9781 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9782 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9783 		break;
9784 
9785 	case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
9786 		error_count = gaudi2_handle_psoc_drain(hdev,
9787 				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9788 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9789 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9790 		break;
9791 
9792 	case GAUDI2_EVENT_CPU_AXI_ECC:
9793 		error_count = GAUDI2_NA_EVENT_CAUSE;
9794 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9795 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9796 		break;
9797 	case GAUDI2_EVENT_CPU_L2_RAM_ECC:
9798 		error_count = GAUDI2_NA_EVENT_CAUSE;
9799 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9800 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9801 		break;
9802 	case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
9803 	case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
9804 	case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
9805 	case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
9806 		error_count = gaudi2_handle_mme_sbte_err(hdev, event_type,
9807 						le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9808 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9809 		break;
9810 	case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
9811 		error_count = GAUDI2_NA_EVENT_CAUSE;
9812 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9813 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9814 		break;
9815 	case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
9816 		error_count = GAUDI2_NA_EVENT_CAUSE;
9817 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9818 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9819 		break;
9820 	case GAUDI2_EVENT_PSOC_PRSTN_FALL:
9821 		error_count = GAUDI2_NA_EVENT_CAUSE;
9822 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9823 		break;
9824 	case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
9825 		error_count = GAUDI2_NA_EVENT_CAUSE;
9826 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9827 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9828 		break;
9829 	case GAUDI2_EVENT_PCIE_FATAL_ERR:
9830 		error_count = GAUDI2_NA_EVENT_CAUSE;
9831 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9832 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9833 		break;
9834 	case GAUDI2_EVENT_TPC0_BMON_SPMU:
9835 	case GAUDI2_EVENT_TPC1_BMON_SPMU:
9836 	case GAUDI2_EVENT_TPC2_BMON_SPMU:
9837 	case GAUDI2_EVENT_TPC3_BMON_SPMU:
9838 	case GAUDI2_EVENT_TPC4_BMON_SPMU:
9839 	case GAUDI2_EVENT_TPC5_BMON_SPMU:
9840 	case GAUDI2_EVENT_TPC6_BMON_SPMU:
9841 	case GAUDI2_EVENT_TPC7_BMON_SPMU:
9842 	case GAUDI2_EVENT_TPC8_BMON_SPMU:
9843 	case GAUDI2_EVENT_TPC9_BMON_SPMU:
9844 	case GAUDI2_EVENT_TPC10_BMON_SPMU:
9845 	case GAUDI2_EVENT_TPC11_BMON_SPMU:
9846 	case GAUDI2_EVENT_TPC12_BMON_SPMU:
9847 	case GAUDI2_EVENT_TPC13_BMON_SPMU:
9848 	case GAUDI2_EVENT_TPC14_BMON_SPMU:
9849 	case GAUDI2_EVENT_TPC15_BMON_SPMU:
9850 	case GAUDI2_EVENT_TPC16_BMON_SPMU:
9851 	case GAUDI2_EVENT_TPC17_BMON_SPMU:
9852 	case GAUDI2_EVENT_TPC18_BMON_SPMU:
9853 	case GAUDI2_EVENT_TPC19_BMON_SPMU:
9854 	case GAUDI2_EVENT_TPC20_BMON_SPMU:
9855 	case GAUDI2_EVENT_TPC21_BMON_SPMU:
9856 	case GAUDI2_EVENT_TPC22_BMON_SPMU:
9857 	case GAUDI2_EVENT_TPC23_BMON_SPMU:
9858 	case GAUDI2_EVENT_TPC24_BMON_SPMU:
9859 	case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU:
9860 	case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU:
9861 	case GAUDI2_EVENT_MME0_WAP_BMON_SPMU:
9862 	case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU:
9863 	case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU:
9864 	case GAUDI2_EVENT_MME1_WAP_BMON_SPMU:
9865 	case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU:
9866 	case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU:
9867 	case GAUDI2_EVENT_MME2_WAP_BMON_SPMU:
9868 	case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU:
9869 	case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU:
9870 	case GAUDI2_EVENT_MME3_WAP_BMON_SPMU:
9871 	case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU:
9872 		fallthrough;
9873 	case GAUDI2_EVENT_DEC0_BMON_SPMU:
9874 	case GAUDI2_EVENT_DEC1_BMON_SPMU:
9875 	case GAUDI2_EVENT_DEC2_BMON_SPMU:
9876 	case GAUDI2_EVENT_DEC3_BMON_SPMU:
9877 	case GAUDI2_EVENT_DEC4_BMON_SPMU:
9878 	case GAUDI2_EVENT_DEC5_BMON_SPMU:
9879 	case GAUDI2_EVENT_DEC6_BMON_SPMU:
9880 	case GAUDI2_EVENT_DEC7_BMON_SPMU:
9881 	case GAUDI2_EVENT_DEC8_BMON_SPMU:
9882 	case GAUDI2_EVENT_DEC9_BMON_SPMU:
9883 	case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
9884 		error_count = GAUDI2_NA_EVENT_CAUSE;
9885 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9886 		break;
9887 
9888 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9889 	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9890 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9891 	case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9892 		gaudi2_print_clk_change_info(hdev, event_type, &event_mask);
9893 		error_count = GAUDI2_NA_EVENT_CAUSE;
9894 		break;
9895 
9896 	case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
9897 		gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err);
9898 		error_count = GAUDI2_NA_EVENT_CAUSE;
9899 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9900 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9901 		break;
9902 
9903 	case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
9904 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9905 		error_count = GAUDI2_NA_EVENT_CAUSE;
9906 		/* Do nothing- FW will handle it */
9907 		break;
9908 
9909 	case GAUDI2_EVENT_PCIE_P2P_MSIX:
9910 		error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type);
9911 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9912 		break;
9913 
9914 	case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
9915 		index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
9916 		error_count = gaudi2_handle_sm_err(hdev, event_type, index);
9917 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9918 		break;
9919 
9920 	case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
9921 		error_count = GAUDI2_NA_EVENT_CAUSE;
9922 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9923 		break;
9924 
9925 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
9926 		dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
9927 						le64_to_cpu(eq_entry->data[0]));
9928 		error_count = GAUDI2_NA_EVENT_CAUSE;
9929 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9930 		break;
9931 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
9932 		dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
9933 						le64_to_cpu(eq_entry->data[0]));
9934 		error_count = GAUDI2_NA_EVENT_CAUSE;
9935 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9936 		break;
9937 
9938 	case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
9939 		gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err);
9940 		error_count = GAUDI2_NA_EVENT_CAUSE;
9941 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9942 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9943 		break;
9944 
9945 	case GAUDI2_EVENT_ARC_DCCM_FULL:
9946 		error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data);
9947 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9948 		break;
9949 
9950 	case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
9951 	case GAUDI2_EVENT_CPU_DEV_RESET_REQ:
9952 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9953 		error_count = GAUDI2_NA_EVENT_CAUSE;
9954 		is_critical = true;
9955 		break;
9956 
9957 	default:
9958 		if (gaudi2_irq_map_table[event_type].valid) {
9959 			dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
9960 						event_type);
9961 			error_count = GAUDI2_NA_EVENT_CAUSE;
9962 		}
9963 	}
9964 
9965 	/* Make sure to dump an error in case no error cause was printed so far.
9966 	 * Note that although we have counted the errors, we use this number as
9967 	 * a boolean.
9968 	 */
9969 	if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type))
9970 		gaudi2_print_event(hdev, event_type, true, "%d", event_type);
9971 	else if (error_count == 0)
9972 		gaudi2_print_event(hdev, event_type, true,
9973 				"No error cause for H/W event %u", event_type);
9974 
9975 	if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) ||
9976 				reset_required) {
9977 		if (reset_required ||
9978 				(gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD))
9979 			reset_flags |= HL_DRV_RESET_HARD;
9980 
9981 		if (hdev->hard_reset_on_fw_events ||
9982 				(hdev->asic_prop.fw_security_enabled && is_critical))
9983 			goto reset_device;
9984 	}
9985 
9986 	/* Send unmask irq only for interrupts not classified as MSG */
9987 	if (!gaudi2_irq_map_table[event_type].msg)
9988 		hl_fw_unmask_irq(hdev, event_type);
9989 
9990 	if (event_mask)
9991 		hl_notifier_event_send_all(hdev, event_mask);
9992 
9993 	return;
9994 
9995 reset_device:
9996 	if (hdev->asic_prop.fw_security_enabled && is_critical) {
9997 		reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
9998 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
9999 	} else {
10000 		reset_flags |= HL_DRV_RESET_DELAY;
10001 	}
10002 	/* escalate general hw errors to critical/fatal error */
10003 	if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
10004 		hl_handle_critical_hw_err(hdev, event_type, &event_mask);
10005 
10006 	event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
10007 	hl_device_cond_reset(hdev, reset_flags, event_mask);
10008 }
10009 
10010 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
10011 			struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr,
10012 			u32 hw_queue_id, u32 size, u64 addr, u32 val)
10013 {
10014 	u32 ctl, pkt_size;
10015 	int rc = 0;
10016 
10017 	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
10018 	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
10019 	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1);
10020 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1);
10021 
10022 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
10023 	lin_dma_pkt->src_addr = cpu_to_le64(val);
10024 	lin_dma_pkt->dst_addr = cpu_to_le64(addr);
10025 	lin_dma_pkt->tsize = cpu_to_le32(size);
10026 
10027 	pkt_size = sizeof(struct packet_lin_dma);
10028 
10029 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
10030 	if (rc)
10031 		dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n",
10032 				hw_queue_id);
10033 
10034 	return rc;
10035 }
10036 
10037 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
10038 {
10039 	u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
10040 					GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
10041 					GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
10042 					GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
10043 	u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
10044 		old_mmubp, mmubp, num_of_pkts, busy, pkt_size;
10045 	u64 comp_addr, cur_addr = addr, end_addr = addr + size;
10046 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10047 	void *lin_dma_pkts_arr;
10048 	dma_addr_t pkt_dma_addr;
10049 	int rc = 0, dma_num = 0;
10050 
10051 	if (prop->edma_enabled_mask == 0) {
10052 		dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
10053 		return -EIO;
10054 	}
10055 
10056 	sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10057 	sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
10058 	comp_addr = CFG_BASE + sob_addr;
10059 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
10060 		FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
10061 	mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
10062 		FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
10063 
10064 	/* Calculate how many lin dma pkts we'll need */
10065 	num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
10066 	pkt_size = sizeof(struct packet_lin_dma);
10067 
10068 	lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts,
10069 					&pkt_dma_addr, GFP_KERNEL);
10070 	if (!lin_dma_pkts_arr)
10071 		return -ENOMEM;
10072 
10073 	/*
10074 	 * set mmu bypass for the scrubbing - all ddmas are configured the same so save
10075 	 * only the first one to restore later
10076 	 * also set the sob addr for all edma cores for completion.
10077 	 * set QM as trusted to allow it to access physical address with MMU bp.
10078 	 */
10079 	old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
10080 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10081 		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10082 			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10083 			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10084 
10085 			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10086 				continue;
10087 
10088 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
10089 					edma_offset, mmubp);
10090 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
10091 					lower_32_bits(comp_addr));
10092 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
10093 					upper_32_bits(comp_addr));
10094 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
10095 					comp_val);
10096 			gaudi2_qman_set_test_mode(hdev,
10097 					edma_queues_id[dcore] + 4 * edma_idx, true);
10098 		}
10099 	}
10100 
10101 	WREG32(sob_addr, 0);
10102 
10103 	while (cur_addr < end_addr) {
10104 		for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10105 			for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10106 				u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10107 
10108 				if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10109 					continue;
10110 
10111 				chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
10112 
10113 				rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
10114 					(struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
10115 					pkt_dma_addr + dma_num * pkt_size,
10116 					edma_queues_id[dcore] + edma_idx * 4,
10117 					chunk_size, cur_addr, val);
10118 				if (rc)
10119 					goto end;
10120 
10121 				dma_num++;
10122 				cur_addr += chunk_size;
10123 				if (cur_addr == end_addr)
10124 					break;
10125 			}
10126 		}
10127 	}
10128 
10129 	rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
10130 	if (rc) {
10131 		dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
10132 		goto end;
10133 	}
10134 end:
10135 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10136 		for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10137 			u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10138 			u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10139 
10140 			if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10141 				continue;
10142 
10143 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
10144 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0);
10145 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0);
10146 			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0);
10147 			gaudi2_qman_set_test_mode(hdev,
10148 					edma_queues_id[dcore] + 4 * edma_idx, false);
10149 		}
10150 	}
10151 
10152 	WREG32(sob_addr, 0);
10153 	hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr);
10154 
10155 	return rc;
10156 }
10157 
10158 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val)
10159 {
10160 	int rc;
10161 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10162 	u64 size = prop->dram_end_address - prop->dram_user_base_address;
10163 
10164 	rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val);
10165 
10166 	if (rc)
10167 		dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n",
10168 				prop->dram_user_base_address, size);
10169 	return rc;
10170 }
10171 
10172 static int gaudi2_scrub_device_mem(struct hl_device *hdev)
10173 {
10174 	int rc;
10175 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10176 	u64 val = hdev->memory_scrub_val;
10177 	u64 addr, size;
10178 
10179 	if (!hdev->memory_scrub)
10180 		return 0;
10181 
10182 	/* scrub SRAM */
10183 	addr = prop->sram_user_base_address;
10184 	size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET);
10185 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n",
10186 			addr, addr + size, val);
10187 	rc = gaudi2_memset_device_memory(hdev, addr, size, val);
10188 	if (rc) {
10189 		dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc);
10190 		return rc;
10191 	}
10192 
10193 	/* scrub DRAM */
10194 	rc = gaudi2_scrub_device_dram(hdev, val);
10195 	if (rc) {
10196 		dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc);
10197 		return rc;
10198 	}
10199 	return 0;
10200 }
10201 
10202 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
10203 {
10204 	u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr,
10205 		cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr;
10206 	u32 val, size, offset;
10207 	int dcore_id;
10208 
10209 	offset = hdev->asic_prop.first_available_cq[0] * 4;
10210 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset;
10211 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset;
10212 	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset;
10213 	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset;
10214 	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset;
10215 	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset;
10216 	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 -
10217 			(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset);
10218 
10219 	/* memset dcore0 CQ registers */
10220 	gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10221 	gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10222 	gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10223 	gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10224 	gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10225 	gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10226 
10227 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
10228 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
10229 	cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET;
10230 	cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET;
10231 	cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET;
10232 	cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET;
10233 	size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0;
10234 
10235 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10236 		gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10237 		gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10238 		gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10239 		gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10240 		gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10241 		gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10242 
10243 		cq_lbw_l_addr += DCORE_OFFSET;
10244 		cq_lbw_h_addr += DCORE_OFFSET;
10245 		cq_lbw_data_addr += DCORE_OFFSET;
10246 		cq_base_l_addr += DCORE_OFFSET;
10247 		cq_base_h_addr += DCORE_OFFSET;
10248 		cq_size_addr += DCORE_OFFSET;
10249 	}
10250 
10251 	offset = hdev->asic_prop.first_available_user_mon[0] * 4;
10252 	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset;
10253 	val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT;
10254 	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset);
10255 
10256 	/* memset dcore0 monitors */
10257 	gaudi2_memset_device_lbw(hdev, addr, size, val);
10258 
10259 	addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset;
10260 	gaudi2_memset_device_lbw(hdev, addr, size, 0);
10261 
10262 	mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET;
10263 	mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET;
10264 	size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0;
10265 
10266 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10267 		gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val);
10268 		gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0);
10269 		mon_sts_addr += DCORE_OFFSET;
10270 		mon_cfg_addr += DCORE_OFFSET;
10271 	}
10272 
10273 	offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10274 	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset;
10275 	val = 0;
10276 	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 -
10277 			(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10278 
10279 	/* memset dcore0 sobs */
10280 	gaudi2_memset_device_lbw(hdev, addr, size, val);
10281 
10282 	addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET;
10283 	size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0;
10284 
10285 	for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10286 		gaudi2_memset_device_lbw(hdev, addr, size, val);
10287 		addr += DCORE_OFFSET;
10288 	}
10289 
10290 	/* Flush all WREG to prevent race */
10291 	val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10292 }
10293 
10294 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev)
10295 {
10296 	u32 reg_base, hw_queue_id;
10297 
10298 	for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0;
10299 							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10300 		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10301 			continue;
10302 
10303 		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10304 
10305 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10306 		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10307 	}
10308 
10309 	/* Flush all WREG to prevent race */
10310 	RREG32(mmPDMA0_QM_ARB_CFG_0);
10311 }
10312 
10313 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev)
10314 {
10315 	u32 reg_base, hw_queue_id;
10316 
10317 	for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3;
10318 							hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10319 		if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10320 			continue;
10321 
10322 		gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10323 
10324 		reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10325 		WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10326 	}
10327 
10328 	/* Flush all WREG to prevent race */
10329 	RREG32(mmPDMA0_QM_ARB_CFG_0);
10330 }
10331 
10332 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid)
10333 {
10334 	return 0;
10335 }
10336 
10337 static void gaudi2_restore_phase_topology(struct hl_device *hdev)
10338 {
10339 }
10340 
10341 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx,
10342 						struct dup_block_ctx *cfg_ctx)
10343 {
10344 	u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off;
10345 	u8 seq;
10346 	int i;
10347 
10348 	for (i = 0 ; i < cfg_ctx->instances ; i++) {
10349 		seq = block_idx * cfg_ctx->instances + i;
10350 
10351 		/* skip disabled instance */
10352 		if (!(cfg_ctx->enabled_mask & BIT_ULL(seq)))
10353 			continue;
10354 
10355 		cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off,
10356 					cfg_ctx->data);
10357 	}
10358 }
10359 
10360 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx,
10361 						u64 mask)
10362 {
10363 	int i;
10364 
10365 	cfg_ctx->enabled_mask = mask;
10366 
10367 	for (i = 0 ; i < cfg_ctx->blocks ; i++)
10368 		gaudi2_init_block_instances(hdev, i, cfg_ctx);
10369 }
10370 
10371 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx)
10372 {
10373 	gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX);
10374 }
10375 
10376 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
10377 {
10378 	void *host_mem_virtual_addr;
10379 	dma_addr_t host_mem_dma_addr;
10380 	u64 reserved_va_base;
10381 	u32 pos, size_left, size_to_dma;
10382 	struct hl_ctx *ctx;
10383 	int rc = 0;
10384 
10385 	/* Fetch the ctx */
10386 	ctx = hl_get_compute_ctx(hdev);
10387 	if (!ctx) {
10388 		dev_err(hdev->dev, "No ctx available\n");
10389 		return -EINVAL;
10390 	}
10391 
10392 	/* Allocate buffers for read and for poll */
10393 	host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr,
10394 								GFP_KERNEL | __GFP_ZERO);
10395 	if (host_mem_virtual_addr == NULL) {
10396 		dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n");
10397 		rc = -ENOMEM;
10398 		goto put_ctx;
10399 	}
10400 
10401 	/* Reserve VM region on asic side */
10402 	reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M,
10403 						HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10404 	if (!reserved_va_base) {
10405 		dev_err(hdev->dev, "Failed to reserve vmem on asic\n");
10406 		rc = -ENOMEM;
10407 		goto free_data_buffer;
10408 	}
10409 
10410 	/* Create mapping on asic side */
10411 	mutex_lock(&hdev->mmu_lock);
10412 
10413 	rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
10414 	if (rc) {
10415 		dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
10416 		goto unreserve_va;
10417 	}
10418 
10419 	rc = hl_mmu_invalidate_cache_range(hdev, false,
10420 				      MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
10421 				      ctx->asid, reserved_va_base, SZ_2M);
10422 	if (rc) {
10423 		hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10424 		goto unreserve_va;
10425 	}
10426 
10427 	mutex_unlock(&hdev->mmu_lock);
10428 
10429 	/* Enable MMU on KDMA */
10430 	gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
10431 
10432 	pos = 0;
10433 	size_left = size;
10434 	size_to_dma = SZ_2M;
10435 
10436 	while (size_left > 0) {
10437 		if (size_left < SZ_2M)
10438 			size_to_dma = size_left;
10439 
10440 		rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false);
10441 		if (rc)
10442 			break;
10443 
10444 		memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma);
10445 
10446 		if (size_left <= SZ_2M)
10447 			break;
10448 
10449 		pos += SZ_2M;
10450 		addr += SZ_2M;
10451 		size_left -= SZ_2M;
10452 	}
10453 
10454 	gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
10455 
10456 	mutex_lock(&hdev->mmu_lock);
10457 
10458 	rc = hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10459 	if (rc)
10460 		goto unreserve_va;
10461 
10462 	rc = hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
10463 				      ctx->asid, reserved_va_base, SZ_2M);
10464 
10465 unreserve_va:
10466 	mutex_unlock(&hdev->mmu_lock);
10467 	hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
10468 free_data_buffer:
10469 	hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
10470 put_ctx:
10471 	hl_ctx_put(ctx);
10472 
10473 	return rc;
10474 }
10475 
10476 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx)
10477 {
10478 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10479 	int min_alloc_order, rc;
10480 
10481 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10482 		return 0;
10483 
10484 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
10485 								HOST_SPACE_INTERNAL_CB_SZ,
10486 								&hdev->internal_cb_pool_dma_addr,
10487 								GFP_KERNEL | __GFP_ZERO);
10488 
10489 	if (!hdev->internal_cb_pool_virt_addr)
10490 		return -ENOMEM;
10491 
10492 	min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev),
10493 					gaudi2_get_wait_cb_size(hdev)));
10494 
10495 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
10496 	if (!hdev->internal_cb_pool) {
10497 		dev_err(hdev->dev, "Failed to create internal CB pool\n");
10498 		rc = -ENOMEM;
10499 		goto free_internal_cb_pool;
10500 	}
10501 
10502 	rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr,
10503 				HOST_SPACE_INTERNAL_CB_SZ, -1);
10504 	if (rc) {
10505 		dev_err(hdev->dev, "Failed to add memory to internal CB pool\n");
10506 		rc = -EFAULT;
10507 		goto destroy_internal_cb_pool;
10508 	}
10509 
10510 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
10511 					HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10512 
10513 	if (!hdev->internal_cb_va_base) {
10514 		rc = -ENOMEM;
10515 		goto destroy_internal_cb_pool;
10516 	}
10517 
10518 	mutex_lock(&hdev->mmu_lock);
10519 
10520 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
10521 					HOST_SPACE_INTERNAL_CB_SZ);
10522 	if (rc)
10523 		goto unreserve_internal_cb_pool;
10524 
10525 	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
10526 	if (rc)
10527 		goto unmap_internal_cb_pool;
10528 
10529 	mutex_unlock(&hdev->mmu_lock);
10530 
10531 	return 0;
10532 
10533 unmap_internal_cb_pool:
10534 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10535 unreserve_internal_cb_pool:
10536 	mutex_unlock(&hdev->mmu_lock);
10537 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10538 destroy_internal_cb_pool:
10539 	gen_pool_destroy(hdev->internal_cb_pool);
10540 free_internal_cb_pool:
10541 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10542 					hdev->internal_cb_pool_dma_addr);
10543 
10544 	return rc;
10545 }
10546 
10547 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx)
10548 {
10549 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10550 
10551 	if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10552 		return;
10553 
10554 	mutex_lock(&hdev->mmu_lock);
10555 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10556 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10557 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
10558 	mutex_unlock(&hdev->mmu_lock);
10559 
10560 	gen_pool_destroy(hdev->internal_cb_pool);
10561 
10562 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10563 					hdev->internal_cb_pool_dma_addr);
10564 }
10565 
10566 static void gaudi2_restore_user_registers(struct hl_device *hdev)
10567 {
10568 	gaudi2_restore_user_sm_registers(hdev);
10569 	gaudi2_restore_user_qm_registers(hdev);
10570 }
10571 
10572 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10573 {
10574 	struct hl_device *hdev = ctx->hdev;
10575 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10576 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10577 	int rc;
10578 
10579 	rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10580 				gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
10581 	if (rc)
10582 		dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
10583 			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10584 
10585 	return rc;
10586 }
10587 
10588 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10589 {
10590 	struct hl_device *hdev = ctx->hdev;
10591 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10592 	int rc;
10593 
10594 	rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10595 				prop->pmmu.page_size, true);
10596 	if (rc)
10597 		dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
10598 			RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10599 }
10600 
10601 static int gaudi2_ctx_init(struct hl_ctx *ctx)
10602 {
10603 	int rc;
10604 
10605 	rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
10606 	if (rc)
10607 		return rc;
10608 
10609 	/* No need to clear user registers if the device has just
10610 	 * performed reset, we restore only nic qm registers
10611 	 */
10612 	if (ctx->hdev->reset_upon_device_release)
10613 		gaudi2_restore_nic_qm_registers(ctx->hdev);
10614 	else
10615 		gaudi2_restore_user_registers(ctx->hdev);
10616 
10617 	rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
10618 	if (rc)
10619 		return rc;
10620 
10621 	rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
10622 	if (rc)
10623 		gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10624 
10625 	return rc;
10626 }
10627 
10628 static void gaudi2_ctx_fini(struct hl_ctx *ctx)
10629 {
10630 	if (ctx->asid == HL_KERNEL_ASID_ID)
10631 		return;
10632 
10633 	gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10634 
10635 	gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
10636 }
10637 
10638 static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
10639 {
10640 	struct hl_device *hdev = cs->ctx->hdev;
10641 	int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
10642 	u32 mon_payload, sob_id, mon_id;
10643 
10644 	if (!cs_needs_completion(cs))
10645 		return 0;
10646 
10647 	/*
10648 	 * First 64 SOB/MON are reserved for driver for QMAN auto completion
10649 	 * mechanism. Each SOB/MON pair are used for a pending CS with the same
10650 	 * cyclic index. The SOB value is increased when each of the CS jobs is
10651 	 * completed. When the SOB reaches the number of CS jobs, the monitor
10652 	 * generates MSI-X interrupt.
10653 	 */
10654 
10655 	sob_id = mon_id = index;
10656 	mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
10657 				(1 << CQ_ENTRY_READY_SHIFT) | index;
10658 
10659 	gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
10660 				cs->jobs_cnt);
10661 
10662 	return 0;
10663 }
10664 
10665 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
10666 {
10667 	return HL_INVALID_QUEUE;
10668 }
10669 
10670 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
10671 {
10672 	struct hl_cb *cb = data;
10673 	struct packet_msg_short *pkt;
10674 	u32 value, ctl, pkt_size = sizeof(*pkt);
10675 
10676 	pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size);
10677 	memset(pkt, 0, pkt_size);
10678 
10679 	/* Inc by 1, Mode ADD */
10680 	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
10681 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
10682 
10683 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
10684 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */
10685 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10686 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb);
10687 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10688 
10689 	pkt->value = cpu_to_le32(value);
10690 	pkt->ctl = cpu_to_le32(ctl);
10691 
10692 	return size + pkt_size;
10693 }
10694 
10695 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)
10696 {
10697 	u32 ctl, pkt_size = sizeof(*pkt);
10698 
10699 	memset(pkt, 0, pkt_size);
10700 
10701 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10702 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0);  /* MON base */
10703 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10704 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10705 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0);
10706 
10707 	pkt->value = cpu_to_le32(value);
10708 	pkt->ctl = cpu_to_le32(ctl);
10709 
10710 	return pkt_size;
10711 }
10712 
10713 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt,
10714 					u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr)
10715 {
10716 	u32 ctl, value, pkt_size = sizeof(*pkt);
10717 	u8 mask;
10718 
10719 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
10720 		dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask);
10721 		return 0;
10722 	}
10723 
10724 	memset(pkt, 0, pkt_size);
10725 
10726 	value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
10727 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
10728 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/
10729 	value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask);
10730 
10731 	ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10732 	ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
10733 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10734 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10735 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10736 
10737 	pkt->value = cpu_to_le32(value);
10738 	pkt->ctl = cpu_to_le32(ctl);
10739 
10740 	return pkt_size;
10741 }
10742 
10743 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
10744 {
10745 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
10746 
10747 	memset(pkt, 0, pkt_size);
10748 
10749 	cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
10750 	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
10751 	cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2);
10752 
10753 	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
10754 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10755 	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10756 
10757 	pkt->cfg = cpu_to_le32(cfg);
10758 	pkt->ctl = cpu_to_le32(ctl);
10759 
10760 	return pkt_size;
10761 }
10762 
10763 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
10764 {
10765 	struct hl_cb *cb = prop->data;
10766 	void *buf = (void *) (uintptr_t) (cb->kernel_address);
10767 
10768 	u64 monitor_base, fence_addr = 0;
10769 	u32 stream_index, size = prop->size;
10770 	u16 msg_addr_offset;
10771 
10772 	stream_index = prop->q_idx % 4;
10773 	fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] +
10774 			QM_FENCE2_OFFSET + stream_index * 4;
10775 
10776 	/*
10777 	 * monitor_base should be the content of the base0 address registers,
10778 	 * so it will be added to the msg short offsets
10779 	 */
10780 	monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
10781 
10782 	/* First monitor config packet: low address of the sync */
10783 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) -
10784 				monitor_base;
10785 
10786 	size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset);
10787 
10788 	/* Second monitor config packet: high address of the sync */
10789 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) -
10790 				monitor_base;
10791 
10792 	size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset);
10793 
10794 	/*
10795 	 * Third monitor config packet: the payload, i.e. what to write when the
10796 	 * sync triggers
10797 	 */
10798 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) -
10799 				monitor_base;
10800 
10801 	size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset);
10802 
10803 	/* Fourth monitor config packet: bind the monitor to a sync object */
10804 	msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base;
10805 
10806 	size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask,
10807 						prop->sob_val, msg_addr_offset);
10808 
10809 	/* Fence packet */
10810 	size += gaudi2_add_fence_pkt(buf + size);
10811 
10812 	return size;
10813 }
10814 
10815 static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
10816 {
10817 	struct hl_hw_sob *hw_sob = data;
10818 
10819 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
10820 
10821 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0);
10822 
10823 	kref_init(&hw_sob->kref);
10824 }
10825 
10826 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group)
10827 {
10828 }
10829 
10830 static u64 gaudi2_get_device_time(struct hl_device *hdev)
10831 {
10832 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
10833 
10834 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
10835 }
10836 
10837 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs)
10838 {
10839 	return 0;
10840 }
10841 
10842 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx,
10843 					struct hl_cs *cs, u32 wait_queue_id,
10844 					u32 collective_engine_id, u32 encaps_signal_offset)
10845 {
10846 	return -EINVAL;
10847 }
10848 
10849 /*
10850  * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address
10851  *                   to DMMU page-size address (64MB) before mapping it in
10852  *                   the MMU.
10853  * The operation is performed on both the virtual and physical addresses.
10854  * for device with 6 HBMs the scramble is:
10855  * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48]
10856  *
10857  * Example:
10858  * =============================================================================
10859  * Allocated DRAM  Reserved VA      scrambled VA for MMU mapping    Scrambled PA
10860  * Phys address                                                     in MMU last
10861  *                                                                    HOP
10862  * =============================================================================
10863  * PA1 0x3000000  VA1 0x9C000000  SVA1= (VA1/48M)*64M 0xD0000000  <- PA1/48M 0x1
10864  * PA2 0x9000000  VA2 0x9F000000  SVA2= (VA2/48M)*64M 0xD4000000  <- PA2/48M 0x3
10865  * =============================================================================
10866  */
10867 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr)
10868 {
10869 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10870 	u32 divisor, mod_va;
10871 	u64 div_va;
10872 
10873 	/* accept any address in the DRAM address space */
10874 	if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE,
10875 									VA_HBM_SPACE_END)) {
10876 
10877 		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10878 		div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va);
10879 		return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) |
10880 			(div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) |
10881 			(mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT);
10882 	}
10883 
10884 	return raw_addr;
10885 }
10886 
10887 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr)
10888 {
10889 	struct asic_fixed_properties *prop = &hdev->asic_prop;
10890 	u32 divisor, mod_va;
10891 	u64 div_va;
10892 
10893 	/* accept any address in the DRAM address space */
10894 	if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE,
10895 									VA_HBM_SPACE_END)) {
10896 
10897 		divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
10898 		div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK,
10899 					PAGE_SIZE_64MB, &mod_va);
10900 
10901 		return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) +
10902 					(div_va * divisor + mod_va));
10903 	}
10904 
10905 	return scrambled_addr;
10906 }
10907 
10908 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id)
10909 {
10910 	u32 base = 0, dcore_id, dec_id;
10911 
10912 	if (core_id >= NUMBER_OF_DEC) {
10913 		dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id);
10914 		goto out;
10915 	}
10916 
10917 	if (core_id < 8) {
10918 		dcore_id = core_id / NUM_OF_DEC_PER_DCORE;
10919 		dec_id = core_id % NUM_OF_DEC_PER_DCORE;
10920 
10921 		base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET +
10922 				dec_id * DCORE_VDEC_OFFSET;
10923 	} else {
10924 		/* PCIe Shared Decoder */
10925 		base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET);
10926 	}
10927 out:
10928 	return base;
10929 }
10930 
10931 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
10932 				u32 *block_size, u32 *block_id)
10933 {
10934 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10935 	int i;
10936 
10937 	for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) {
10938 		if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) {
10939 			*block_id = i;
10940 			if (block_size)
10941 				*block_size = gaudi2->mapped_blocks[i].size;
10942 			return 0;
10943 		}
10944 	}
10945 
10946 	dev_err(hdev->dev, "Invalid block address %#llx", block_addr);
10947 
10948 	return -EINVAL;
10949 }
10950 
10951 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
10952 			u32 block_id, u32 block_size)
10953 {
10954 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10955 	u64 offset_in_bar;
10956 	u64 address;
10957 	int rc;
10958 
10959 	if (block_id >= NUM_USER_MAPPED_BLOCKS) {
10960 		dev_err(hdev->dev, "Invalid block id %u", block_id);
10961 		return -EINVAL;
10962 	}
10963 
10964 	/* we allow mapping only an entire block */
10965 	if (block_size != gaudi2->mapped_blocks[block_id].size) {
10966 		dev_err(hdev->dev, "Invalid block size %u", block_size);
10967 		return -EINVAL;
10968 	}
10969 
10970 	offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR;
10971 
10972 	address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
10973 
10974 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
10975 			VM_DONTCOPY | VM_NORESERVE);
10976 
10977 	rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
10978 			block_size, vma->vm_page_prot);
10979 	if (rc)
10980 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
10981 
10982 	return rc;
10983 }
10984 
10985 static void gaudi2_enable_events_from_fw(struct hl_device *hdev)
10986 {
10987 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
10988 
10989 	struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
10990 	u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq);
10991 
10992 	if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
10993 		WREG32(irq_handler_offset,
10994 			gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id);
10995 }
10996 
10997 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base)
10998 {
10999 	switch (mmu_id) {
11000 	case HW_CAP_DCORE0_DMMU0:
11001 		*mmu_base = mmDCORE0_HMMU0_MMU_BASE;
11002 		break;
11003 	case HW_CAP_DCORE0_DMMU1:
11004 		*mmu_base = mmDCORE0_HMMU1_MMU_BASE;
11005 		break;
11006 	case HW_CAP_DCORE0_DMMU2:
11007 		*mmu_base = mmDCORE0_HMMU2_MMU_BASE;
11008 		break;
11009 	case HW_CAP_DCORE0_DMMU3:
11010 		*mmu_base = mmDCORE0_HMMU3_MMU_BASE;
11011 		break;
11012 	case HW_CAP_DCORE1_DMMU0:
11013 		*mmu_base = mmDCORE1_HMMU0_MMU_BASE;
11014 		break;
11015 	case HW_CAP_DCORE1_DMMU1:
11016 		*mmu_base = mmDCORE1_HMMU1_MMU_BASE;
11017 		break;
11018 	case HW_CAP_DCORE1_DMMU2:
11019 		*mmu_base = mmDCORE1_HMMU2_MMU_BASE;
11020 		break;
11021 	case HW_CAP_DCORE1_DMMU3:
11022 		*mmu_base = mmDCORE1_HMMU3_MMU_BASE;
11023 		break;
11024 	case HW_CAP_DCORE2_DMMU0:
11025 		*mmu_base = mmDCORE2_HMMU0_MMU_BASE;
11026 		break;
11027 	case HW_CAP_DCORE2_DMMU1:
11028 		*mmu_base = mmDCORE2_HMMU1_MMU_BASE;
11029 		break;
11030 	case HW_CAP_DCORE2_DMMU2:
11031 		*mmu_base = mmDCORE2_HMMU2_MMU_BASE;
11032 		break;
11033 	case HW_CAP_DCORE2_DMMU3:
11034 		*mmu_base = mmDCORE2_HMMU3_MMU_BASE;
11035 		break;
11036 	case HW_CAP_DCORE3_DMMU0:
11037 		*mmu_base = mmDCORE3_HMMU0_MMU_BASE;
11038 		break;
11039 	case HW_CAP_DCORE3_DMMU1:
11040 		*mmu_base = mmDCORE3_HMMU1_MMU_BASE;
11041 		break;
11042 	case HW_CAP_DCORE3_DMMU2:
11043 		*mmu_base = mmDCORE3_HMMU2_MMU_BASE;
11044 		break;
11045 	case HW_CAP_DCORE3_DMMU3:
11046 		*mmu_base = mmDCORE3_HMMU3_MMU_BASE;
11047 		break;
11048 	case HW_CAP_PMMU:
11049 		*mmu_base = mmPMMU_HBW_MMU_BASE;
11050 		break;
11051 	default:
11052 		return -EINVAL;
11053 	}
11054 
11055 	return 0;
11056 }
11057 
11058 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
11059 {
11060 	bool is_pmmu = (mmu_id == HW_CAP_PMMU);
11061 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
11062 	u32 mmu_base;
11063 
11064 	if (!(gaudi2->hw_cap_initialized & mmu_id))
11065 		return;
11066 
11067 	if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
11068 		return;
11069 
11070 	gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL);
11071 	gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
11072 }
11073 
11074 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
11075 {
11076 	u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES;
11077 
11078 	/* check all HMMUs */
11079 	for (i = 0 ; i < num_of_hmmus ; i++) {
11080 		mmu_id = HW_CAP_DCORE0_DMMU0 << i;
11081 
11082 		if (mmu_cap_mask & mmu_id)
11083 			gaudi2_ack_mmu_error(hdev, mmu_id);
11084 	}
11085 
11086 	/* check PMMU */
11087 	if (mmu_cap_mask & HW_CAP_PMMU)
11088 		gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU);
11089 
11090 	return 0;
11091 }
11092 
11093 static void gaudi2_get_msi_info(__le32 *table)
11094 {
11095 	table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
11096 }
11097 
11098 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
11099 {
11100 	switch (pll_idx) {
11101 	case HL_GAUDI2_CPU_PLL: return CPU_PLL;
11102 	case HL_GAUDI2_PCI_PLL: return PCI_PLL;
11103 	case HL_GAUDI2_NIC_PLL: return NIC_PLL;
11104 	case HL_GAUDI2_DMA_PLL: return DMA_PLL;
11105 	case HL_GAUDI2_MESH_PLL: return MESH_PLL;
11106 	case HL_GAUDI2_MME_PLL: return MME_PLL;
11107 	case HL_GAUDI2_TPC_PLL: return TPC_PLL;
11108 	case HL_GAUDI2_IF_PLL: return IF_PLL;
11109 	case HL_GAUDI2_SRAM_PLL: return SRAM_PLL;
11110 	case HL_GAUDI2_HBM_PLL: return HBM_PLL;
11111 	case HL_GAUDI2_VID_PLL: return VID_PLL;
11112 	case HL_GAUDI2_MSS_PLL: return MSS_PLL;
11113 	default: return -EINVAL;
11114 	}
11115 }
11116 
11117 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map)
11118 {
11119 	/* Not implemented */
11120 	return 0;
11121 }
11122 
11123 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon)
11124 {
11125 	/* Not implemented */
11126 	return 0;
11127 }
11128 
11129 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset,
11130 				struct hl_device *hdev, struct hl_mon_state_dump *mon)
11131 {
11132 	/* Not implemented */
11133 	return 0;
11134 }
11135 
11136 
11137 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset,
11138 				u64 status_base_offset, enum hl_sync_engine_type engine_type,
11139 				u32 engine_id, char **buf, size_t *size, size_t *offset)
11140 {
11141 	/* Not implemented */
11142 	return 0;
11143 }
11144 
11145 
11146 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = {
11147 	.monitor_valid = gaudi2_monitor_valid,
11148 	.print_single_monitor = gaudi2_print_single_monitor,
11149 	.gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map,
11150 	.print_fences_single_engine = gaudi2_print_fences_single_engine,
11151 };
11152 
11153 static void gaudi2_state_dump_init(struct hl_device *hdev)
11154 {
11155 	/* Not implemented */
11156 	hdev->state_dump_specs.props = gaudi2_state_dump_specs_props;
11157 	hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs;
11158 }
11159 
11160 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id)
11161 {
11162 	return 0;
11163 }
11164 
11165 static u32 *gaudi2_get_stream_master_qid_arr(void)
11166 {
11167 	return NULL;
11168 }
11169 
11170 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
11171 				struct attribute_group *dev_vrm_attr_grp)
11172 {
11173 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
11174 	hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp);
11175 }
11176 
11177 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
11178 					u32 page_size, u32 *real_page_size, bool is_dram_addr)
11179 {
11180 	struct asic_fixed_properties *prop = &hdev->asic_prop;
11181 
11182 	/* for host pages the page size must be  */
11183 	if (!is_dram_addr) {
11184 		if (page_size % mmu_prop->page_size)
11185 			goto page_size_err;
11186 
11187 		*real_page_size = mmu_prop->page_size;
11188 		return 0;
11189 	}
11190 
11191 	if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size))
11192 		goto page_size_err;
11193 
11194 	/*
11195 	 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater
11196 	 * than DRAM page size).
11197 	 * for this reason work with the DRAM page size and let the MMU scrambling routine handle
11198 	 * this mismatch when calculating the address to place in the MMU page table.
11199 	 * (in that case also make sure that the dram_page_size is not greater than the
11200 	 * mmu page size)
11201 	 */
11202 	*real_page_size = prop->dram_page_size;
11203 
11204 	return 0;
11205 
11206 page_size_err:
11207 	dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
11208 							page_size, mmu_prop->page_size >> 10);
11209 	return -EFAULT;
11210 }
11211 
11212 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
11213 {
11214 	return -EOPNOTSUPP;
11215 }
11216 
11217 int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
11218 {
11219 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
11220 
11221 	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
11222 		return 0;
11223 
11224 	return hl_fw_send_device_activity(hdev, open);
11225 }
11226 
11227 static const struct hl_asic_funcs gaudi2_funcs = {
11228 	.early_init = gaudi2_early_init,
11229 	.early_fini = gaudi2_early_fini,
11230 	.late_init = gaudi2_late_init,
11231 	.late_fini = gaudi2_late_fini,
11232 	.sw_init = gaudi2_sw_init,
11233 	.sw_fini = gaudi2_sw_fini,
11234 	.hw_init = gaudi2_hw_init,
11235 	.hw_fini = gaudi2_hw_fini,
11236 	.halt_engines = gaudi2_halt_engines,
11237 	.suspend = gaudi2_suspend,
11238 	.resume = gaudi2_resume,
11239 	.mmap = gaudi2_mmap,
11240 	.ring_doorbell = gaudi2_ring_doorbell,
11241 	.pqe_write = gaudi2_pqe_write,
11242 	.asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent,
11243 	.asic_dma_free_coherent = gaudi2_dma_free_coherent,
11244 	.scrub_device_mem = gaudi2_scrub_device_mem,
11245 	.scrub_device_dram = gaudi2_scrub_device_dram,
11246 	.get_int_queue_base = NULL,
11247 	.test_queues = gaudi2_test_queues,
11248 	.asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc,
11249 	.asic_dma_pool_free = gaudi2_dma_pool_free,
11250 	.cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
11251 	.cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
11252 	.asic_dma_unmap_single = gaudi2_dma_unmap_single,
11253 	.asic_dma_map_single = gaudi2_dma_map_single,
11254 	.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
11255 	.cs_parser = gaudi2_cs_parser,
11256 	.asic_dma_map_sgtable = hl_dma_map_sgtable,
11257 	.add_end_of_cb_packets = NULL,
11258 	.update_eq_ci = gaudi2_update_eq_ci,
11259 	.context_switch = gaudi2_context_switch,
11260 	.restore_phase_topology = gaudi2_restore_phase_topology,
11261 	.debugfs_read_dma = gaudi2_debugfs_read_dma,
11262 	.add_device_attr = gaudi2_add_device_attr,
11263 	.handle_eqe = gaudi2_handle_eqe,
11264 	.get_events_stat = gaudi2_get_events_stat,
11265 	.read_pte = NULL,
11266 	.write_pte = NULL,
11267 	.mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
11268 	.mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
11269 	.mmu_prefetch_cache_range = NULL,
11270 	.send_heartbeat = gaudi2_send_heartbeat,
11271 	.debug_coresight = gaudi2_debug_coresight,
11272 	.is_device_idle = gaudi2_is_device_idle,
11273 	.compute_reset_late_init = gaudi2_compute_reset_late_init,
11274 	.hw_queues_lock = gaudi2_hw_queues_lock,
11275 	.hw_queues_unlock = gaudi2_hw_queues_unlock,
11276 	.get_pci_id = gaudi2_get_pci_id,
11277 	.get_eeprom_data = gaudi2_get_eeprom_data,
11278 	.get_monitor_dump = gaudi2_get_monitor_dump,
11279 	.send_cpu_message = gaudi2_send_cpu_message,
11280 	.pci_bars_map = gaudi2_pci_bars_map,
11281 	.init_iatu = gaudi2_init_iatu,
11282 	.rreg = hl_rreg,
11283 	.wreg = hl_wreg,
11284 	.halt_coresight = gaudi2_halt_coresight,
11285 	.ctx_init = gaudi2_ctx_init,
11286 	.ctx_fini = gaudi2_ctx_fini,
11287 	.pre_schedule_cs = gaudi2_pre_schedule_cs,
11288 	.get_queue_id_for_cq = gaudi2_get_queue_id_for_cq,
11289 	.load_firmware_to_device = NULL,
11290 	.load_boot_fit_to_device = NULL,
11291 	.get_signal_cb_size = gaudi2_get_signal_cb_size,
11292 	.get_wait_cb_size = gaudi2_get_wait_cb_size,
11293 	.gen_signal_cb = gaudi2_gen_signal_cb,
11294 	.gen_wait_cb = gaudi2_gen_wait_cb,
11295 	.reset_sob = gaudi2_reset_sob,
11296 	.reset_sob_group = gaudi2_reset_sob_group,
11297 	.get_device_time = gaudi2_get_device_time,
11298 	.pb_print_security_errors = gaudi2_pb_print_security_errors,
11299 	.collective_wait_init_cs = gaudi2_collective_wait_init_cs,
11300 	.collective_wait_create_jobs = gaudi2_collective_wait_create_jobs,
11301 	.get_dec_base_addr = gaudi2_get_dec_base_addr,
11302 	.scramble_addr = gaudi2_mmu_scramble_addr,
11303 	.descramble_addr = gaudi2_mmu_descramble_addr,
11304 	.ack_protection_bits_errors = gaudi2_ack_protection_bits_errors,
11305 	.get_hw_block_id = gaudi2_get_hw_block_id,
11306 	.hw_block_mmap = gaudi2_block_mmap,
11307 	.enable_events_from_fw = gaudi2_enable_events_from_fw,
11308 	.ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
11309 	.get_msi_info = gaudi2_get_msi_info,
11310 	.map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
11311 	.init_firmware_preload_params = gaudi2_init_firmware_preload_params,
11312 	.init_firmware_loader = gaudi2_init_firmware_loader,
11313 	.init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
11314 	.state_dump_init = gaudi2_state_dump_init,
11315 	.get_sob_addr = &gaudi2_get_sob_addr,
11316 	.set_pci_memory_regions = gaudi2_set_pci_memory_regions,
11317 	.get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr,
11318 	.check_if_razwi_happened = gaudi2_check_if_razwi_happened,
11319 	.mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
11320 	.access_dev_mem = hl_access_dev_mem,
11321 	.set_dram_bar_base = gaudi2_set_hbm_bar_base,
11322 	.set_engine_cores = gaudi2_set_engine_cores,
11323 	.set_engines = gaudi2_set_engines,
11324 	.send_device_activity = gaudi2_send_device_activity,
11325 	.set_dram_properties = gaudi2_set_dram_properties,
11326 	.set_binning_masks = gaudi2_set_binning_masks,
11327 };
11328 
11329 void gaudi2_set_asic_funcs(struct hl_device *hdev)
11330 {
11331 	hdev->asic_funcs = &gaudi2_funcs;
11332 }
11333